Skip to main content

forme/font/
mod.rs

1//! # Font Management
2//!
3//! Loading, parsing, and subsetting fonts for PDF embedding.
4//!
5//! For v1, we support the 14 standard PDF fonts (Helvetica, Times, Courier, etc.)
6//! which don't require embedding. Custom font support via ttf-parser comes next.
7
8pub mod builtin;
9pub mod fallback;
10pub mod metrics;
11pub mod subset;
12
13pub use metrics::{unicode_to_winansi, StandardFontMetrics};
14use std::collections::HashMap;
15
16/// A font registry that maps font family + weight + style to font data.
17pub struct FontRegistry {
18    fonts: HashMap<FontKey, FontData>,
19}
20
21#[derive(Debug, Clone, Hash, PartialEq, Eq)]
22pub struct FontKey {
23    pub family: String,
24    pub weight: u32,
25    pub italic: bool,
26}
27
28#[derive(Debug, Clone)]
29pub enum FontData {
30    /// One of the 14 standard PDF fonts. No embedding needed.
31    Standard(StandardFont),
32    /// A TrueType/OpenType font that needs to be embedded.
33    Custom {
34        data: Vec<u8>,
35        /// Glyph IDs that are actually used (for subsetting).
36        used_glyphs: Vec<u16>,
37        /// Parsed metrics from ttf-parser, if available.
38        metrics: Option<CustomFontMetrics>,
39    },
40}
41
42/// Parsed metrics from a TrueType/OpenType font via ttf-parser.
43#[derive(Debug, Clone)]
44pub struct CustomFontMetrics {
45    pub units_per_em: u16,
46    pub advance_widths: HashMap<char, u16>,
47    pub default_advance: u16,
48    pub ascender: i16,
49    pub descender: i16,
50    /// Maps characters to their glyph IDs in the original font.
51    pub glyph_ids: HashMap<char, u16>,
52}
53
54impl CustomFontMetrics {
55    /// Get the advance width of a character in points.
56    pub fn char_width(&self, ch: char, font_size: f64) -> f64 {
57        let w = self
58            .advance_widths
59            .get(&ch)
60            .copied()
61            .unwrap_or(self.default_advance);
62        (w as f64 / self.units_per_em as f64) * font_size
63    }
64
65    /// Parse metrics from font data using ttf-parser.
66    pub fn from_font_data(data: &[u8]) -> Option<Self> {
67        let face = ttf_parser::Face::parse(data, 0).ok()?;
68        let units_per_em = face.units_per_em();
69        let ascender = face.ascender();
70        let descender = face.descender();
71
72        let mut advance_widths = HashMap::new();
73        let mut glyph_ids = HashMap::new();
74        let mut default_advance = 0u16;
75
76        // Sample common characters to build width and glyph ID maps
77        for code in 32u32..=0xFFFF {
78            if let Some(ch) = char::from_u32(code) {
79                if let Some(glyph_id) = face.glyph_index(ch) {
80                    let advance = face.glyph_hor_advance(glyph_id).unwrap_or(0);
81                    advance_widths.insert(ch, advance);
82                    glyph_ids.insert(ch, glyph_id.0);
83                    if ch == ' ' {
84                        default_advance = advance;
85                    }
86                }
87            }
88        }
89
90        if default_advance == 0 {
91            default_advance = units_per_em / 2;
92        }
93
94        Some(CustomFontMetrics {
95            units_per_em,
96            advance_widths,
97            default_advance,
98            ascender,
99            descender,
100            glyph_ids,
101        })
102    }
103}
104
105/// The 14 standard PDF fonts.
106#[derive(Debug, Clone, Copy)]
107pub enum StandardFont {
108    Helvetica,
109    HelveticaBold,
110    HelveticaOblique,
111    HelveticaBoldOblique,
112    TimesRoman,
113    TimesBold,
114    TimesItalic,
115    TimesBoldItalic,
116    Courier,
117    CourierBold,
118    CourierOblique,
119    CourierBoldOblique,
120    Symbol,
121    ZapfDingbats,
122}
123
124impl FontData {
125    /// Check whether this font has a glyph for the given character.
126    pub fn has_char(&self, ch: char) -> bool {
127        match self {
128            FontData::Custom {
129                metrics: Some(m), ..
130            } => m.glyph_ids.contains_key(&ch),
131            FontData::Custom { metrics: None, .. } => false,
132            FontData::Standard(_) => {
133                unicode_to_winansi(ch).is_some() || (ch as u32) >= 32 && (ch as u32) <= 255
134            }
135        }
136    }
137}
138
139impl StandardFont {
140    /// The PDF name for this font.
141    pub fn pdf_name(&self) -> &'static str {
142        match self {
143            Self::Helvetica => "Helvetica",
144            Self::HelveticaBold => "Helvetica-Bold",
145            Self::HelveticaOblique => "Helvetica-Oblique",
146            Self::HelveticaBoldOblique => "Helvetica-BoldOblique",
147            Self::TimesRoman => "Times-Roman",
148            Self::TimesBold => "Times-Bold",
149            Self::TimesItalic => "Times-Italic",
150            Self::TimesBoldItalic => "Times-BoldItalic",
151            Self::Courier => "Courier",
152            Self::CourierBold => "Courier-Bold",
153            Self::CourierOblique => "Courier-Oblique",
154            Self::CourierBoldOblique => "Courier-BoldOblique",
155            Self::Symbol => "Symbol",
156            Self::ZapfDingbats => "ZapfDingbats",
157        }
158    }
159}
160
161impl Default for FontRegistry {
162    fn default() -> Self {
163        Self::new()
164    }
165}
166
167impl FontRegistry {
168    pub fn new() -> Self {
169        let mut fonts = HashMap::new();
170
171        let standard_mappings = vec![
172            (("Helvetica", 400, false), StandardFont::Helvetica),
173            (("Helvetica", 700, false), StandardFont::HelveticaBold),
174            (("Helvetica", 400, true), StandardFont::HelveticaOblique),
175            (("Helvetica", 700, true), StandardFont::HelveticaBoldOblique),
176            (("Times", 400, false), StandardFont::TimesRoman),
177            (("Times", 700, false), StandardFont::TimesBold),
178            (("Times", 400, true), StandardFont::TimesItalic),
179            (("Times", 700, true), StandardFont::TimesBoldItalic),
180            (("Courier", 400, false), StandardFont::Courier),
181            (("Courier", 700, false), StandardFont::CourierBold),
182            (("Courier", 400, true), StandardFont::CourierOblique),
183            (("Courier", 700, true), StandardFont::CourierBoldOblique),
184        ];
185
186        for ((family, weight, italic), font) in standard_mappings {
187            fonts.insert(
188                FontKey {
189                    family: family.to_string(),
190                    weight,
191                    italic,
192                },
193                FontData::Standard(font),
194            );
195        }
196
197        let mut registry = Self { fonts };
198        builtin::register_builtin_fonts(&mut registry);
199        registry
200    }
201
202    /// Look up a font by family name (or comma-separated fallback chain),
203    /// falling back to Helvetica if none match.
204    ///
205    /// Supports CSS-style font family lists: `"Inter, Helvetica"` tries Inter
206    /// first, then Helvetica. Quoted families are unquoted automatically.
207    pub fn resolve(&self, families: &str, weight: u32, italic: bool) -> &FontData {
208        let snapped_weight = if weight >= 600 { 700 } else { 400 };
209
210        for family in families.split(',') {
211            let family = family.trim().trim_matches('"').trim_matches('\'');
212            if family.is_empty() {
213                continue;
214            }
215
216            // Try exact weight
217            let key = FontKey {
218                family: family.to_string(),
219                weight,
220                italic,
221            };
222            if let Some(font) = self.fonts.get(&key) {
223                return font;
224            }
225
226            // Try with normalized weight (snap to 400 or 700)
227            let key = FontKey {
228                family: family.to_string(),
229                weight: snapped_weight,
230                italic,
231            };
232            if let Some(font) = self.fonts.get(&key) {
233                return font;
234            }
235
236            // Try opposite weight (400 if bold requested, 700 if regular requested)
237            let opposite_weight = if snapped_weight == 700 { 400 } else { 700 };
238            let key = FontKey {
239                family: family.to_string(),
240                weight: opposite_weight,
241                italic,
242            };
243            if let Some(font) = self.fonts.get(&key) {
244                return font;
245            }
246        }
247
248        // Final fallback: Helvetica
249        let key = FontKey {
250            family: "Helvetica".to_string(),
251            weight: snapped_weight,
252            italic,
253        };
254        self.fonts.get(&key).unwrap_or_else(|| {
255            self.fonts
256                .get(&FontKey {
257                    family: "Helvetica".to_string(),
258                    weight: 400,
259                    italic: false,
260                })
261                .expect("Helvetica must be registered")
262        })
263    }
264
265    /// Resolve a font for a specific character from a comma-separated fallback chain.
266    ///
267    /// Walks the families in order, returning the first font that has a glyph for `ch`.
268    /// Falls back to Helvetica if no font covers the character.
269    /// Returns a tuple of (font_data, resolved_single_family_name).
270    pub fn resolve_for_char(
271        &self,
272        families: &str,
273        ch: char,
274        weight: u32,
275        italic: bool,
276    ) -> (&FontData, String) {
277        let snapped_weight = if weight >= 600 { 700 } else { 400 };
278
279        for family in families.split(',') {
280            let family = family.trim().trim_matches('"').trim_matches('\'');
281            if family.is_empty() {
282                continue;
283            }
284
285            // Try exact weight
286            let key = FontKey {
287                family: family.to_string(),
288                weight,
289                italic,
290            };
291            if let Some(font) = self.fonts.get(&key) {
292                if font.has_char(ch) {
293                    return (font, family.to_string());
294                }
295            }
296
297            // Try with normalized weight
298            let key = FontKey {
299                family: family.to_string(),
300                weight: snapped_weight,
301                italic,
302            };
303            if let Some(font) = self.fonts.get(&key) {
304                if font.has_char(ch) {
305                    return (font, family.to_string());
306                }
307            }
308
309            // Try opposite weight (400 if bold requested, 700 if regular requested)
310            let opposite_weight = if snapped_weight == 700 { 400 } else { 700 };
311            let key = FontKey {
312                family: family.to_string(),
313                weight: opposite_weight,
314                italic,
315            };
316            if let Some(font) = self.fonts.get(&key) {
317                if font.has_char(ch) {
318                    return (font, family.to_string());
319                }
320            }
321        }
322
323        // Try builtin Unicode font (Noto Sans) before Helvetica
324        let builtin_key = FontKey {
325            family: "Noto Sans".to_string(),
326            weight: snapped_weight,
327            italic: false,
328        };
329        if let Some(font) = self.fonts.get(&builtin_key) {
330            if font.has_char(ch) {
331                return (font, "Noto Sans".to_string());
332            }
333        }
334
335        // Final fallback: Helvetica
336        let key = FontKey {
337            family: "Helvetica".to_string(),
338            weight: snapped_weight,
339            italic,
340        };
341        let font = self.fonts.get(&key).unwrap_or_else(|| {
342            self.fonts
343                .get(&FontKey {
344                    family: "Helvetica".to_string(),
345                    weight: 400,
346                    italic: false,
347                })
348                .expect("Helvetica must be registered")
349        });
350        (font, "Helvetica".to_string())
351    }
352
353    /// Register a custom font.
354    pub fn register(&mut self, family: &str, weight: u32, italic: bool, data: Vec<u8>) {
355        let metrics = CustomFontMetrics::from_font_data(&data);
356        self.fonts.insert(
357            FontKey {
358                family: family.to_string(),
359                weight,
360                italic,
361            },
362            FontData::Custom {
363                data,
364                used_glyphs: Vec::new(),
365                metrics,
366            },
367        );
368    }
369
370    /// Iterate over all registered fonts.
371    pub fn iter(&self) -> impl Iterator<Item = (&FontKey, &FontData)> {
372        self.fonts.iter()
373    }
374}
375
376/// Shared font context used by layout and PDF serialization.
377/// Provides text measurement with real glyph metrics.
378pub struct FontContext {
379    registry: FontRegistry,
380    /// Number of digits to use when measuring page number sentinel width.
381    /// Default 2 ("00"). Updated by the two-pass render loop after the
382    /// first layout reveals the actual page count.
383    sentinel_digit_count: u32,
384}
385
386impl Default for FontContext {
387    fn default() -> Self {
388        Self::new()
389    }
390}
391
392impl FontContext {
393    pub fn new() -> Self {
394        Self {
395            registry: FontRegistry::new(),
396            sentinel_digit_count: 2,
397        }
398    }
399
400    /// Get the current sentinel digit count.
401    pub fn sentinel_digit_count(&self) -> u32 {
402        self.sentinel_digit_count
403    }
404
405    /// Set the number of digits used to measure page number sentinel width.
406    pub fn set_sentinel_digit_count(&mut self, count: u32) {
407        self.sentinel_digit_count = count;
408    }
409
410    /// Get the advance width of a single character in points.
411    ///
412    /// When `family` contains a comma (font fallback chain), resolves the
413    /// best font for this specific character before measuring.
414    pub fn char_width(
415        &self,
416        ch: char,
417        family: &str,
418        weight: u32,
419        italic: bool,
420        font_size: f64,
421    ) -> f64 {
422        // Page placeholder sentinels: measure as the width of N zeros
423        // where N = sentinel_digit_count (set by the two-pass render loop)
424        if ch == crate::layout::PAGE_NUMBER_SENTINEL || ch == crate::layout::TOTAL_PAGES_SENTINEL {
425            return self.char_width('0', family, weight, italic, font_size)
426                * self.sentinel_digit_count as f64;
427        }
428
429        // Fast path: single font family — try primary font first,
430        // fall back to per-char resolution only when the char isn't covered
431        let font_data = if !family.contains(',') {
432            let primary = self.registry.resolve(family, weight, italic);
433            if ch.is_whitespace() || primary.has_char(ch) {
434                primary
435            } else {
436                let (data, _) = self.registry.resolve_for_char(family, ch, weight, italic);
437                data
438            }
439        } else {
440            let (data, _) = self.registry.resolve_for_char(family, ch, weight, italic);
441            data
442        };
443        match font_data {
444            FontData::Standard(std_font) => std_font.metrics().char_width(ch, font_size),
445            FontData::Custom {
446                metrics: Some(m), ..
447            } => m.char_width(ch, font_size),
448            FontData::Custom { metrics: None, .. } => {
449                StandardFont::Helvetica.metrics().char_width(ch, font_size)
450            }
451        }
452    }
453
454    /// Measure the width of a string in points.
455    pub fn measure_string(
456        &self,
457        text: &str,
458        family: &str,
459        weight: u32,
460        italic: bool,
461        font_size: f64,
462        letter_spacing: f64,
463    ) -> f64 {
464        let mut width = 0.0;
465        for ch in text.chars() {
466            width += self.char_width(ch, family, weight, italic, font_size) + letter_spacing;
467        }
468        width
469    }
470
471    /// Resolve a font key to its font data.
472    pub fn resolve(&self, family: &str, weight: u32, italic: bool) -> &FontData {
473        self.registry.resolve(family, weight, italic)
474    }
475
476    /// Access the underlying font registry.
477    pub fn registry(&self) -> &FontRegistry {
478        &self.registry
479    }
480
481    /// Access the underlying font registry mutably.
482    pub fn registry_mut(&mut self) -> &mut FontRegistry {
483        &mut self.registry
484    }
485
486    /// Get the raw font data bytes for a custom font.
487    /// Returns `None` for standard fonts or if the font isn't found.
488    pub fn font_data(&self, family: &str, weight: u32, italic: bool) -> Option<&[u8]> {
489        let font_data = self.registry.resolve(family, weight, italic);
490        match font_data {
491            FontData::Custom { data, .. } => Some(data),
492            FontData::Standard(_) => None,
493        }
494    }
495
496    /// Get the units-per-em for a font. Returns 1000 for standard fonts.
497    pub fn units_per_em(&self, family: &str, weight: u32, italic: bool) -> u16 {
498        let font_data = self.registry.resolve(family, weight, italic);
499        match font_data {
500            FontData::Custom {
501                metrics: Some(m), ..
502            } => m.units_per_em,
503            FontData::Custom { metrics: None, .. } => 1000,
504            FontData::Standard(_) => 1000,
505        }
506    }
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512
513    #[test]
514    fn test_font_context_helvetica() {
515        let ctx = FontContext::new();
516        let w = ctx.char_width(' ', "Helvetica", 400, false, 12.0);
517        assert!((w - 3.336).abs() < 0.001);
518    }
519
520    #[test]
521    fn test_font_context_bold_wider() {
522        let ctx = FontContext::new();
523        let regular = ctx.char_width('A', "Helvetica", 400, false, 12.0);
524        let bold = ctx.char_width('A', "Helvetica", 700, false, 12.0);
525        assert!(bold > regular, "Bold A should be wider than regular A");
526    }
527
528    #[test]
529    fn test_font_context_measure_string() {
530        let ctx = FontContext::new();
531        let w = ctx.measure_string("Hello", "Helvetica", 400, false, 12.0, 0.0);
532        assert!(w > 0.0);
533    }
534
535    #[test]
536    fn test_font_context_fallback() {
537        let ctx = FontContext::new();
538        let w1 = ctx.char_width('A', "Helvetica", 400, false, 12.0);
539        let w2 = ctx.char_width('A', "UnknownFont", 400, false, 12.0);
540        assert!((w1 - w2).abs() < 0.001);
541    }
542
543    #[test]
544    fn test_font_context_weight_resolution() {
545        let ctx = FontContext::new();
546        let w700 = ctx.char_width('A', "Helvetica", 700, false, 12.0);
547        let w800 = ctx.char_width('A', "Helvetica", 800, false, 12.0);
548        assert!((w700 - w800).abs() < 0.001);
549    }
550
551    #[test]
552    fn test_font_fallback_chain_first_match() {
553        let ctx = FontContext::new();
554        let w1 = ctx.char_width('A', "Times", 400, false, 12.0);
555        let w2 = ctx.char_width('A', "Times, Helvetica", 400, false, 12.0);
556        assert!((w1 - w2).abs() < 0.001, "Should use Times (first in chain)");
557    }
558
559    #[test]
560    fn test_font_fallback_chain_second_match() {
561        let ctx = FontContext::new();
562        let w1 = ctx.char_width('A', "Helvetica", 400, false, 12.0);
563        let w2 = ctx.char_width('A', "Missing, Helvetica", 400, false, 12.0);
564        assert!((w1 - w2).abs() < 0.001, "Should fall back to Helvetica");
565    }
566
567    #[test]
568    fn test_font_fallback_chain_all_missing() {
569        let ctx = FontContext::new();
570        // When all specified families are missing, resolve_for_char tries
571        // builtin Noto Sans first, then Helvetica. 'A' is in Noto Sans,
572        // so we get Noto Sans metrics (not Helvetica).
573        let w = ctx.char_width('A', "Missing, AlsoMissing", 400, false, 12.0);
574        assert!(w > 0.0, "Should still produce a valid width from fallback");
575    }
576
577    #[test]
578    fn test_font_fallback_chain_quoted_families() {
579        let ctx = FontContext::new();
580        let w1 = ctx.char_width('A', "Times", 400, false, 12.0);
581        let w2 = ctx.char_width('A', "'Times', \"Helvetica\"", 400, false, 12.0);
582        assert!((w1 - w2).abs() < 0.001, "Should strip quotes and use Times");
583    }
584
585    #[test]
586    fn test_builtin_noto_sans_registered() {
587        let registry = FontRegistry::new();
588        let font = registry.resolve("Noto Sans", 400, false);
589        assert!(
590            matches!(font, FontData::Custom { .. }),
591            "Noto Sans should be registered as a custom font"
592        );
593        assert!(
594            font.has_char('\u{041F}'),
595            "Noto Sans should have Cyrillic П"
596        );
597        assert!(font.has_char('\u{03B1}'), "Noto Sans should have Greek α");
598    }
599
600    #[test]
601    fn test_builtin_noto_sans_fallback_for_cyrillic() {
602        let registry = FontRegistry::new();
603        let (font, family) = registry.resolve_for_char("Helvetica", '\u{041F}', 400, false);
604        assert_eq!(
605            family, "Noto Sans",
606            "Cyrillic should fall back to Noto Sans"
607        );
608        assert!(matches!(font, FontData::Custom { .. }));
609    }
610
611    #[test]
612    fn test_font_fallback_single_family_unchanged() {
613        let ctx = FontContext::new();
614        let w1 = ctx.char_width('A', "Courier", 400, false, 12.0);
615        let w2 = ctx.char_width('A', "Courier", 400, false, 12.0);
616        assert!(
617            (w1 - w2).abs() < 0.001,
618            "Single family should work as before"
619        );
620    }
621}