Skip to main content

azul_layout/
font.rs

1//! Font parsing, metrics extraction, and subsetting.
2//!
3//! This module provides the core font infrastructure for text layout and PDF generation:
4//! - `loading`: System font cache construction and font reload errors
5//! - `mock`: Mock font implementation for testing without real font files
6//! - `parsed`: Full font parsing via allsorts (outlines, metrics, shaping tables, subsetting)
7
8#![cfg(feature = "font_loading")]
9
10use azul_css::{AzString, U8Vec};
11use rust_fontconfig::{FcFontCache, OwnedFontSource};
12
13pub mod loading {
14    #![cfg(feature = "std")]
15    #![cfg(feature = "font_loading")]
16    #![cfg_attr(not(feature = "std"), no_std)]
17
18    use std::io::Error as IoError;
19
20    use azul_css::{AzString, StringVec, U8Vec};
21    use rust_fontconfig::FcFontCache;
22
23    #[cfg(not(miri))]
24    pub fn build_font_cache() -> FcFontCache {
25        FcFontCache::build()
26    }
27
28    #[cfg(miri)]
29    pub fn build_font_cache() -> FcFontCache {
30        FcFontCache::default()
31    }
32
33    #[derive(Debug)]
34    pub enum FontReloadError {
35        Io(IoError, AzString),
36        FontNotFound(AzString),
37        FontLoadingNotActive(AzString),
38    }
39
40    impl Clone for FontReloadError {
41        fn clone(&self) -> Self {
42            use self::FontReloadError::*;
43            match self {
44                Io(err, path) => Io(IoError::new(err.kind(), "Io Error"), path.clone()),
45                FontNotFound(id) => FontNotFound(id.clone()),
46                FontLoadingNotActive(id) => FontLoadingNotActive(id.clone()),
47            }
48        }
49    }
50
51    azul_core::impl_display!(FontReloadError, {
52        Io(err, path_buf) => format!("Could not load \"{}\" - IO error: {}", path_buf.as_str(), err),
53        FontNotFound(id) => format!("Could not locate system font: \"{:?}\" found", id),
54        FontLoadingNotActive(id) => format!("Could not load system font: \"{:?}\": crate was not compiled with --features=\"font_loading\"", id)
55    });
56}
57pub mod mock {
58    //! Mock font implementation for testing text layout.
59    //!
60    //! Provides a `MockFont` that simulates font behavior without requiring
61    //! actual font files, useful for unit testing text layout functionality.
62
63    use std::collections::BTreeMap;
64
65    use crate::text3::cache::LayoutFontMetrics;
66
67    /// A mock font implementation for testing text layout without real fonts.
68    ///
69    /// This allows testing text shaping, layout, and rendering code paths
70    /// without needing to load actual TrueType/OpenType font files.
71    #[derive(Debug, Clone)]
72    pub struct MockFont {
73        /// Font metrics (ascent, descent, etc.).
74        pub font_metrics: LayoutFontMetrics,
75        /// Width of the space character in font units.
76        pub space_width: Option<usize>,
77        /// Horizontal advance widths keyed by glyph ID.
78        pub glyph_advances: BTreeMap<u16, u16>,
79        /// Glyph bounding box sizes (width, height) keyed by glyph ID.
80        pub glyph_sizes: BTreeMap<u16, (i32, i32)>,
81        /// Unicode codepoint to glyph ID mapping.
82        pub glyph_indices: BTreeMap<u32, u16>,
83    }
84
85    impl MockFont {
86        /// Creates a new `MockFont` with the given font metrics.
87        pub fn new(font_metrics: LayoutFontMetrics) -> Self {
88            MockFont {
89                font_metrics,
90                space_width: Some(10),
91                glyph_advances: BTreeMap::new(),
92                glyph_sizes: BTreeMap::new(),
93                glyph_indices: BTreeMap::new(),
94            }
95        }
96
97        /// Sets the space character width.
98        pub fn with_space_width(mut self, width: usize) -> Self {
99            self.space_width = Some(width);
100            self
101        }
102
103        /// Adds a horizontal advance value for a glyph.
104        pub fn with_glyph_advance(mut self, glyph_index: u16, advance: u16) -> Self {
105            self.glyph_advances.insert(glyph_index, advance);
106            self
107        }
108
109        /// Adds a bounding box size for a glyph.
110        pub fn with_glyph_size(mut self, glyph_index: u16, size: (i32, i32)) -> Self {
111            self.glyph_sizes.insert(glyph_index, size);
112            self
113        }
114
115        /// Adds a Unicode codepoint to glyph ID mapping.
116        pub fn with_glyph_index(mut self, unicode: u32, index: u16) -> Self {
117            self.glyph_indices.insert(unicode, index);
118            self
119        }
120    }
121}
122
123pub mod parsed {
124    use core::fmt;
125    use std::{collections::BTreeMap, sync::Arc};
126
127    use allsorts::{
128        binary::read::ReadScope,
129        font_data::FontData,
130        layout::{GDEFTable, LayoutCache, LayoutCacheData, GPOS, GSUB},
131        outline::{OutlineBuilder, OutlineSink},
132        pathfinder_geometry::{line_segment::LineSegment2F, vector::Vector2F},
133        subset::{subset as allsorts_subset, whole_font, CmapTarget, SubsetProfile},
134        tables::{
135            cmap::owned::CmapSubtable as OwnedCmapSubtable,
136            glyf::{
137                Glyph, GlyfVisitorContext, LocaGlyf, Point,
138                VariableGlyfContext, VariableGlyfContextStore,
139            },
140            kern::owned::KernTable,
141            FontTableProvider, HheaTable, MaxpTable,
142        },
143        tag,
144    };
145    use azul_core::resources::{
146        GlyphOutline, GlyphOutlineOperation, OutlineCubicTo, OutlineLineTo, OutlineMoveTo,
147        OutlineQuadTo, OwnedGlyphBoundingBox,
148    };
149    use azul_css::props::basic::FontMetrics as CssFontMetrics;
150
151    // Mock font module for testing
152    pub use crate::font::mock::MockFont;
153    use crate::text3::cache::LayoutFontMetrics;
154
155    /// Cached GSUB table for glyph substitution operations.
156    pub type GsubCache = Arc<LayoutCacheData<GSUB>>;
157    /// Cached GPOS table for glyph positioning operations.
158    pub type GposCache = Arc<LayoutCacheData<GPOS>>;
159
160    /// Monotonic-clock nanos since process start. Used to timestamp
161    /// `ParsedFont.last_used` for LRU eviction. Cheap (single
162    /// `Instant::now`); resolution is plenty fine for "did this
163    /// face get touched in the last N seconds" decisions. Exposed
164    /// `pub(crate)` so `FontManager::evict_unused` reads from the
165    /// same clock as `last_used` writes.
166    pub(crate) fn monotonic_now_nanos() -> u64 {
167        // Safe: `Instant::elapsed` against the same launch instant is
168        // monotonic and never overflows in any realistic process
169        // lifetime (>500 years).
170        use std::sync::OnceLock;
171        use std::time::Instant;
172        static LAUNCH: OnceLock<Instant> = OnceLock::new();
173        let start = LAUNCH.get_or_init(Instant::now);
174        start.elapsed().as_nanos() as u64
175    }
176
177    /// Glyph-outline decoder state. See the
178    /// [`ParsedFont::loca_glyf`] field docs for the full description.
179    #[derive(Clone)]
180    pub(crate) enum LocaGlyfState {
181        /// Ready to decode immediately, or known to have no outline
182        /// data. `None` covers both CFF fonts and fonts where the
183        /// loca+glyf parse failed.
184        ///
185        /// This variant *cannot* be evicted by
186        /// [`crate::text3::cache::FontManager::evict_unused`]: there
187        /// are no source bytes retained to re-decode from. The eager
188        /// `from_bytes` path (tests, `with_source_bytes` PDF callers)
189        /// produces this variant.
190        Loaded(Option<Arc<std::sync::Mutex<LocaGlyf>>>),
191        /// Font bytes retained for lazy `LocaGlyf` construction.
192        ///
193        /// `loaded` is `Mutex<Option<…>>` (not `OnceLock`) so an
194        /// idle eviction can clear it back to `None`; the next
195        /// `get_or_decode_glyph` will re-parse from `bytes`. Two-step
196        /// double-check pattern in `resolve_loca_glyf` keeps the
197        /// expensive `LocaGlyf::load` outside the critical section.
198        Deferred {
199            bytes: Arc<rust_fontconfig::FontBytes>,
200            font_index: usize,
201            loaded: Arc<std::sync::Mutex<Option<Arc<std::sync::Mutex<LocaGlyf>>>>>,
202        },
203    }
204
205    /// Adapter that collects allsorts outline commands into our `GlyphOutline` format.
206    ///
207    /// Implements `OutlineSink` so it can be passed to `GlyfVisitorContext::visit()`.
208    /// This handles composite glyph resolution, transforms, and variable font
209    /// deltas automatically via allsorts internals.
210    struct GlyphOutlineCollector {
211        contours: Vec<GlyphOutline>,
212        current_contour: Vec<GlyphOutlineOperation>,
213    }
214
215    impl GlyphOutlineCollector {
216        fn new() -> Self {
217            Self {
218                contours: Vec::new(),
219                current_contour: Vec::new(),
220            }
221        }
222
223        fn into_outlines(mut self) -> Vec<GlyphOutline> {
224            if !self.current_contour.is_empty() {
225                self.contours.push(GlyphOutline {
226                    operations: std::mem::take(&mut self.current_contour).into(),
227                });
228            }
229            self.contours
230        }
231    }
232
233    impl OutlineSink for GlyphOutlineCollector {
234        fn move_to(&mut self, to: Vector2F) {
235            if !self.current_contour.is_empty() {
236                self.contours.push(GlyphOutline {
237                    operations: std::mem::take(&mut self.current_contour).into(),
238                });
239            }
240            self.current_contour.push(GlyphOutlineOperation::MoveTo(OutlineMoveTo {
241                x: to.x() as i16,
242                y: to.y() as i16,
243            }));
244        }
245
246        fn line_to(&mut self, to: Vector2F) {
247            self.current_contour.push(GlyphOutlineOperation::LineTo(OutlineLineTo {
248                x: to.x() as i16,
249                y: to.y() as i16,
250            }));
251        }
252
253        fn quadratic_curve_to(&mut self, ctrl: Vector2F, to: Vector2F) {
254            self.current_contour.push(GlyphOutlineOperation::QuadraticCurveTo(
255                OutlineQuadTo {
256                    ctrl_1_x: ctrl.x() as i16,
257                    ctrl_1_y: ctrl.y() as i16,
258                    end_x: to.x() as i16,
259                    end_y: to.y() as i16,
260                },
261            ));
262        }
263
264        fn cubic_curve_to(&mut self, ctrl: LineSegment2F, to: Vector2F) {
265            self.current_contour.push(GlyphOutlineOperation::CubicCurveTo(
266                OutlineCubicTo {
267                    ctrl_1_x: ctrl.from_x() as i16,
268                    ctrl_1_y: ctrl.from_y() as i16,
269                    ctrl_2_x: ctrl.to_x() as i16,
270                    ctrl_2_y: ctrl.to_y() as i16,
271                    end_x: to.x() as i16,
272                    end_y: to.y() as i16,
273                },
274            ));
275        }
276
277        fn close(&mut self) {
278            self.current_contour.push(GlyphOutlineOperation::ClosePath);
279            self.contours.push(GlyphOutline {
280                operations: std::mem::take(&mut self.current_contour).into(),
281            });
282        }
283    }
284
285    /// Parsed font data with all required tables for text layout and PDF generation.
286    ///
287    /// This struct holds the parsed representation of a TrueType/OpenType font,
288    /// including glyph outlines, metrics, and shaping tables. It's used for:
289    /// - Text layout (via GSUB/GPOS tables)
290    /// - Glyph rendering (via glyf/CFF outlines)
291    /// - PDF font embedding (via font metrics and subsetting)
292    pub struct ParsedFont {
293        /// Hash of the font bytes for caching and equality checks.
294        pub hash: u64,
295        /// Layout-specific font metrics (ascent, descent, line gap).
296        pub font_metrics: LayoutFontMetrics,
297        /// PDF-specific detailed font metrics from HEAD, HHEA, OS/2 tables.
298        pub pdf_font_metrics: PdfFontMetrics,
299        /// Total number of glyphs in the font (from maxp table).
300        pub num_glyphs: u16,
301        /// Horizontal header table (hhea) containing global horizontal metrics.
302        pub hhea_table: HheaTable,
303        /// Offset+length into original_bytes for hmtx table (lazy: no copy).
304        pub hmtx_range: (usize, usize),
305        /// Offset+length into original_bytes for vmtx table (lazy: no copy).
306        pub vmtx_range: (usize, usize),
307        /// Vertical header table (vhea), same format as hhea. None if font has no vertical metrics.
308        pub vhea_table: Option<HheaTable>,
309        /// Maximum profile table (maxp) containing glyph count and memory hints.
310        pub maxp_table: MaxpTable,
311        /// Raw GSUB table bytes, kept as a `Vec<u8>` (tens to low-hundreds
312        /// of KiB) so the parsed `GsubCache` can be built on first shape
313        /// call instead of up-front. Access via [`ParsedFont::gsub`] —
314        /// that getter populates `gsub_cache_lazy` via `OnceLock` and
315        /// returns a borrow.
316        pub(crate) gsub_bytes: Option<Vec<u8>>,
317        /// Lazy GSUB cache: populated on first [`ParsedFont::gsub`] call.
318        /// `None` means "font has no GSUB table" *after* init attempt;
319        /// the `OnceLock` wrapper distinguishes "not yet initialised"
320        /// from "initialised to None".
321        pub(crate) gsub_cache_lazy: std::sync::OnceLock<Option<GsubCache>>,
322        /// Raw GPOS table bytes. Same lazy-parse arrangement as
323        /// `gsub_bytes` — see [`ParsedFont::gpos`].
324        pub(crate) gpos_bytes: Option<Vec<u8>>,
325        /// Lazy GPOS cache, populated on first [`ParsedFont::gpos`] call.
326        pub(crate) gpos_cache_lazy: std::sync::OnceLock<Option<GposCache>>,
327        /// Glyph definition table (GDEF) for glyph classification.
328        pub opt_gdef_table: Option<Arc<GDEFTable>>,
329        /// Legacy kerning table (kern) for fonts without GPOS.
330        pub opt_kern_table: Option<Arc<KernTable>>,
331        /// Monotonic-clock nanos at the most recent
332        /// [`ParsedFont::get_or_decode_glyph`] / `gsub()` / `gpos()`
333        /// call. `0` means "never touched". Used by
334        /// [`crate::text3::cache::FontManager::evict_unused`] to
335        /// decide which `LocaGlyfState::Deferred` faces to release.
336        pub(crate) last_used: Arc<std::sync::atomic::AtomicU64>,
337        /// `true` if this font is a variable font (carries a `gvar`
338        /// table). Cached at parse time so [`decode_glyph_inner`]
339        /// can short-circuit the variable-context construction for
340        /// the common non-variable case. Variable-glyph delta
341        /// application requires the source bytes to be retained,
342        /// so it only fires on the `LocaGlyfState::Deferred` path.
343        pub(crate) is_variable_font: bool,
344        /// Lazy outline cache. Populated on first
345        /// [`ParsedFont::get_or_decode_glyph`] call per `gid`; entries
346        /// are wrapped in `Arc` so callers can hold them without
347        /// keeping the lock. The space glyph (and `.notdef` when
348        /// present) are pre-inserted by `from_bytes_internal` so the
349        /// shaper's cmap-miss path has something to render without
350        /// racing with a decode.
351        ///
352        /// Tests that previously walked the public `glyph_records_decoded`
353        /// `BTreeMap` field now call
354        /// [`ParsedFont::prime_glyph_cache`] (decodes every glyph into
355        /// this cache) followed by
356        /// [`ParsedFont::for_each_decoded_glyph`] /
357        /// [`ParsedFont::glyph_cache_snapshot`] to walk the result.
358        pub(crate) glyph_cache: Arc<std::sync::RwLock<BTreeMap<u16, Arc<OwnedGlyph>>>>,
359        /// Glyph outline decoder state.
360        ///
361        /// - `Loaded(Some(arc))`: `LocaGlyf` is already loaded (owning
362        ///   its own `Box<[u8]>` copy of the loca+glyf tables) and
363        ///   ready to decode glyphs. Produced by the eager `from_bytes`
364        ///   constructor path (tests).
365        /// - `Loaded(None)`: the font has no usable loca+glyf (CFF, or
366        ///   a parse failure). Glyph outlines won't decode; the hmtx
367        ///   advance fallback fills in the blanks.
368        /// - `Deferred`: we retain an `Arc<[u8]>` to the full font file
369        ///   and the `font_index`; the first `get_or_decode_glyph` call
370        ///   parses a fresh `FontData` / `TableProvider` from those
371        ///   bytes and loads `LocaGlyf`, storing the result in the
372        ///   `OnceLock`. Fonts that get resolved into a chain but are
373        ///   never actually rasterized pay zero decode cost — this is
374        ///   the big win for pages like `excel.html` where 20+ fallback
375        ///   faces load but only a handful are touched.
376        pub(crate) loca_glyf: LocaGlyfState,
377        /// Cached width of the space character in font units.
378        pub space_width: Option<usize>,
379        /// Character-to-glyph mapping (cmap subtable).
380        pub cmap_subtable: Option<OwnedCmapSubtable>,
381        /// Mock font data for testing (replaces real font behavior).
382        pub mock: Option<Box<MockFont>>,
383        /// Reverse mapping: glyph_id -> cluster text (handles ligatures like "fi").
384        pub reverse_glyph_cache: std::collections::BTreeMap<u16, String>,
385        /// Original font bytes — only retained for callers that need to
386        /// reconstruct or subset the font (PDF export). Layout / shaping /
387        /// raster never read this, so `ParsedFont::from_bytes` leaves it
388        /// as `None` by default and callers opt in via
389        /// [`ParsedFont::with_source_bytes`]. Shared across faces of the
390        /// same `.ttc` via the `Arc<FontBytes>` that
391        /// [`rust_fontconfig::FcFontCache::get_font_bytes`] returns —
392        /// for disk fonts the backing is an mmap so untouched pages
393        /// don't count toward RSS.
394        pub original_bytes: Option<std::sync::Arc<rust_fontconfig::FontBytes>>,
395        /// Font index within collection (0 for single-font files).
396        pub original_index: usize,
397        /// GID to CID mapping for CFF fonts (required for PDF embedding).
398        pub index_to_cid: BTreeMap<u16, u16>,
399        /// Font type (TrueType outlines or OpenType CFF).
400        pub font_type: FontType,
401        /// PostScript font name from the NAME table.
402        pub font_name: Option<String>,
403        /// TrueType bytecode hinting instance (mutable interpreter state).
404        /// Wrapped in Mutex because hinting mutates internal state.
405        /// None for CFF fonts or fonts without hinting data.
406        pub hint_instance: Option<std::sync::Mutex<allsorts::hinting::HintInstance>>,
407    }
408
409    impl Clone for ParsedFont {
410        fn clone(&self) -> Self {
411            ParsedFont {
412                hash: self.hash,
413                font_metrics: self.font_metrics.clone(),
414                pdf_font_metrics: self.pdf_font_metrics,
415                num_glyphs: self.num_glyphs,
416                hhea_table: self.hhea_table.clone(),
417                hmtx_range: self.hmtx_range,
418                vmtx_range: self.vmtx_range,
419                vhea_table: self.vhea_table.clone(),
420                maxp_table: self.maxp_table.clone(),
421                // OnceLock<T: Clone>: Clone preserves the init state, so
422                // a clone of a parsed cache skips re-parse on first
423                // access. The raw bytes we keep around for lazy init
424                // are cloned too.
425                gsub_bytes: self.gsub_bytes.clone(),
426                gsub_cache_lazy: self.gsub_cache_lazy.clone(),
427                gpos_bytes: self.gpos_bytes.clone(),
428                gpos_cache_lazy: self.gpos_cache_lazy.clone(),
429                opt_gdef_table: self.opt_gdef_table.clone(),
430                opt_kern_table: self.opt_kern_table.clone(),
431                // Share the lazy cache and loca_glyf across clones: cheap
432                // Arc bump, amortises glyph decode across clones of the
433                // same face.
434                last_used: Arc::clone(&self.last_used),
435                is_variable_font: self.is_variable_font,
436                glyph_cache: Arc::clone(&self.glyph_cache),
437                // `LocaGlyfState` is `Clone` — for `Loaded` this is an
438                // `Arc::clone`; for `Deferred` it's an `Arc::clone` of
439                // the bytes + the `OnceLock`, so a clone of a face
440                // that's already decoded glyphs carries the decode.
441                loca_glyf: self.loca_glyf.clone(),
442                space_width: self.space_width,
443                cmap_subtable: self.cmap_subtable.clone(),
444                mock: self.mock.clone(),
445                reverse_glyph_cache: self.reverse_glyph_cache.clone(),
446                // Arc clone — O(1), just bumps refcount; no byte copy.
447                original_bytes: self.original_bytes.clone(),
448                original_index: self.original_index,
449                index_to_cid: self.index_to_cid.clone(),
450                font_type: self.font_type.clone(),
451                font_name: self.font_name.clone(),
452                // HintInstance has mutable interpreter state and is not Clone.
453                // Clones are used for PDF/serialization where hinting isn't needed.
454                hint_instance: None,
455            }
456        }
457    }
458
459    /// Distinguishes TrueType fonts from OpenType CFF fonts.
460    ///
461    /// This affects how glyph outlines are extracted and how the font
462    /// is embedded in PDF documents.
463    #[derive(Debug, Clone, PartialEq)]
464    pub enum FontType {
465        /// TrueType font with quadratic Bézier outlines in glyf table.
466        TrueType,
467        /// OpenType font with cubic Bézier outlines in CFF table.
468        /// Contains the serialized CFF data for PDF embedding.
469        OpenTypeCFF(Vec<u8>),
470    }
471
472    /// PDF-specific font metrics from HEAD, HHEA, and OS/2 tables.
473    ///
474    /// These metrics are used for PDF font descriptors and accurate
475    /// text positioning in generated PDF documents.
476    #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
477    #[repr(C)]
478    pub struct PdfFontMetrics {
479        // -- HEAD table fields --
480        /// Font units per em-square (typically 1000 or 2048).
481        pub units_per_em: u16,
482        /// Font flags (italic, bold, fixed-pitch, etc.).
483        pub font_flags: u16,
484        /// Minimum x-coordinate across all glyphs.
485        pub x_min: i16,
486        /// Minimum y-coordinate across all glyphs.
487        pub y_min: i16,
488        /// Maximum x-coordinate across all glyphs.
489        pub x_max: i16,
490        /// Maximum y-coordinate across all glyphs.
491        pub y_max: i16,
492
493        // -- HHEA table fields --
494        /// Typographic ascender (distance above baseline).
495        pub ascender: i16,
496        /// Typographic descender (distance below baseline, usually negative).
497        pub descender: i16,
498        /// Recommended line gap between lines of text.
499        pub line_gap: i16,
500        /// Maximum horizontal advance width across all glyphs.
501        pub advance_width_max: u16,
502        /// Caret slope rise for italic angle calculation.
503        pub caret_slope_rise: i16,
504        /// Caret slope run for italic angle calculation.
505        pub caret_slope_run: i16,
506
507        // -- OS/2 table fields (0 if table not present) --
508        /// Average width of lowercase letters.
509        pub x_avg_char_width: i16,
510        /// Visual weight class (100-900, 400=normal, 700=bold).
511        pub us_weight_class: u16,
512        /// Visual width class (1-9, 5=normal).
513        pub us_width_class: u16,
514        /// Thickness of strikeout stroke in font units.
515        pub y_strikeout_size: i16,
516        /// Vertical position of strikeout stroke.
517        pub y_strikeout_position: i16,
518    }
519
520    impl Default for PdfFontMetrics {
521        fn default() -> Self {
522            PdfFontMetrics::zero()
523        }
524    }
525
526    impl PdfFontMetrics {
527        /// Returns zeroed metrics with `units_per_em` set to 1000 (standard PostScript default)
528        /// to avoid division-by-zero in scaling calculations.
529        pub const fn zero() -> Self {
530            PdfFontMetrics {
531                units_per_em: 1000,
532                font_flags: 0,
533                x_min: 0,
534                y_min: 0,
535                x_max: 0,
536                y_max: 0,
537                ascender: 0,
538                descender: 0,
539                line_gap: 0,
540                advance_width_max: 0,
541                caret_slope_rise: 0,
542                caret_slope_run: 0,
543                x_avg_char_width: 0,
544                us_weight_class: 0,
545                us_width_class: 0,
546                y_strikeout_size: 0,
547                y_strikeout_position: 0,
548            }
549        }
550    }
551
552    /// Result of font subsetting operation.
553    ///
554    /// Contains the subsetted font bytes and a mapping from original
555    /// glyph IDs to new glyph IDs in the subset.
556    #[derive(Debug, Clone)]
557    pub struct SubsetFont {
558        /// The subsetted font file bytes (smaller than original).
559        pub bytes: Vec<u8>,
560        /// Mapping: original glyph ID -> (new subset glyph ID, source character).
561        pub glyph_mapping: BTreeMap<u16, (u16, char)>,
562    }
563
564    impl SubsetFont {
565        /// Return the changed text so that when rendering with the subset font (instead of the
566        /// original) the renderer will end up at the same glyph IDs as if we used the original text
567        /// on the original font
568        pub fn subset_text(&self, text: &str) -> String {
569            text.chars()
570                .filter_map(|c| {
571                    self.glyph_mapping.values().find_map(|(ngid, ch)| {
572                        if *ch == c {
573                            char::from_u32(*ngid as u32)
574                        } else {
575                            None
576                        }
577                    })
578                })
579                .collect()
580        }
581    }
582
583    /// Hash-based equality: two fonts are considered equal if their content hash matches.
584    /// This is a performance optimization — hash collisions are possible but vanishingly
585    /// unlikely (~1/2^64).
586    impl PartialEq for ParsedFont {
587        fn eq(&self, other: &Self) -> bool {
588            self.hash == other.hash
589        }
590    }
591
592    impl Eq for ParsedFont {}
593
594    const FONT_B64_START: &str = "data:font/ttf;base64,";
595
596    impl serde::Serialize for ParsedFont {
597        fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
598            use base64::Engine;
599            let s = format!(
600                "{FONT_B64_START}{}",
601                base64::prelude::BASE64_STANDARD.encode(&self.to_bytes(None).unwrap_or_default())
602            );
603            s.serialize(serializer)
604        }
605    }
606
607    impl<'de> serde::Deserialize<'de> for ParsedFont {
608        fn deserialize<D: serde::Deserializer<'de>>(
609            deserializer: D,
610        ) -> Result<ParsedFont, D::Error> {
611            use base64::Engine;
612            let s = String::deserialize(deserializer)?;
613            let b64 = if s.starts_with(FONT_B64_START) {
614                let b = &s[FONT_B64_START.len()..];
615                base64::prelude::BASE64_STANDARD.decode(&b).ok()
616            } else {
617                None
618            };
619
620            let mut warnings = Vec::new();
621            ParsedFont::from_bytes(&b64.unwrap_or_default(), 0, &mut warnings).ok_or_else(|| {
622                serde::de::Error::custom(format!("Font deserialization error: {warnings:?}"))
623            })
624        }
625    }
626
627    impl fmt::Debug for ParsedFont {
628        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
629            f.debug_struct("ParsedFont")
630                .field("hash", &self.hash)
631                .field("font_metrics", &self.font_metrics)
632                .field("num_glyphs", &self.num_glyphs)
633                .field("hhea_table", &self.hhea_table)
634                .field(
635                    "hmtx_range",
636                    &format_args!("<{} bytes>", self.hmtx_range.1),
637                )
638                .field("maxp_table", &self.maxp_table)
639                .field(
640                    "glyph_cache",
641                    &format_args!(
642                        "{} entries (lazy)",
643                        self.glyph_cache.read().map(|m| m.len()).unwrap_or(0),
644                    ),
645                )
646                .field("space_width", &self.space_width)
647                .field("cmap_subtable", &self.cmap_subtable)
648                .finish()
649        }
650    }
651
652    /// Warning or error message generated during font parsing.
653    #[derive(Debug, Clone, PartialEq, Eq)]
654    pub struct FontParseWarning {
655        /// Severity level of this warning.
656        pub severity: FontParseWarningSeverity,
657        /// Human-readable description of the issue.
658        pub message: String,
659    }
660
661    /// Severity level for font parsing warnings.
662    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
663    pub enum FontParseWarningSeverity {
664        /// Informational message (not an error).
665        Info,
666        /// Warning that may affect font rendering.
667        Warning,
668        /// Error that prevents proper font usage.
669        Error,
670    }
671
672    impl FontParseWarning {
673        /// Creates an info-level message.
674        pub fn info(message: String) -> Self {
675            Self {
676                severity: FontParseWarningSeverity::Info,
677                message,
678            }
679        }
680
681        /// Creates a warning-level message.
682        pub fn warning(message: String) -> Self {
683            Self {
684                severity: FontParseWarningSeverity::Warning,
685                message,
686            }
687        }
688
689        /// Creates an error-level message.
690        pub fn error(message: String) -> Self {
691            Self {
692                severity: FontParseWarningSeverity::Error,
693                message,
694            }
695        }
696    }
697
698    impl ParsedFont {
699        /// Parse a font from bytes using allsorts
700        ///
701        /// # Arguments
702        /// * `font_bytes` - The font file data
703        /// * `font_index` - Index of the font in a font collection (0 for single fonts)
704        /// * `warnings` - Optional vector to collect parsing warnings
705        ///
706        /// # Returns
707        /// `Some(ParsedFont)` if parsing succeeds, `None` otherwise
708        ///
709        /// Note: Outlines are decoded lazily by `get_or_decode_glyph`;
710        /// `LocaGlyf::load` runs eagerly here. Use `from_bytes_shared`
711        /// for the lazy-LocaGlyf production path.
712        pub fn from_bytes(
713            font_bytes: &[u8],
714            font_index: usize,
715            warnings: &mut Vec<FontParseWarning>,
716        ) -> Option<Self> {
717            // `from_bytes` keeps the eager-LocaGlyf behaviour for the
718            // small number of callers (mainly tests) that don't have
719            // an `Arc<[u8]>` to keep alive for the lazy path.
720            Self::from_bytes_internal(font_bytes, font_index, warnings, false)
721        }
722
723        /// Shared implementation of `from_bytes` / `from_bytes_shared`.
724        ///
725        /// `defer_loca_glyf = true` skips the `LocaGlyf::load` call
726        /// here so the caller (`from_bytes_shared`) can install a
727        /// `LocaGlyfState::Deferred` slot that re-parses on first
728        /// glyph decode. Saves the load-then-drop cycle the previous
729        /// arrangement paid (`from_bytes_shared` used to call
730        /// `from_bytes` and immediately replace the loaded LocaGlyf
731        /// with a Deferred slot, throwing away ~hundreds of KiB of
732        /// loca+glyf bytes per face for fonts in the chain that get
733        /// loaded but never rasterized).
734        fn from_bytes_internal(
735            font_bytes: &[u8],
736            font_index: usize,
737            warnings: &mut Vec<FontParseWarning>,
738            defer_loca_glyf: bool,
739        ) -> Option<Self> {
740            use std::{
741                collections::hash_map::DefaultHasher,
742                hash::{Hash, Hasher},
743            };
744
745            use allsorts::{
746                binary::read::ReadScope,
747                font_data::FontData,
748                tables::{
749                    cmap::{owned::CmapSubtable as OwnedCmapSubtable, CmapSubtable},
750                    FontTableProvider, HeadTable, HheaTable, MaxpTable,
751                },
752                tag,
753            };
754
755            let scope = ReadScope::new(font_bytes);
756            let font_file = match scope.read::<FontData<'_>>() {
757                Ok(ff) => ff,
758                Err(e) => {
759                    warnings.push(FontParseWarning::error(format!(
760                        "Failed to read font data: {}",
761                        e
762                    )));
763                    return None;
764                }
765            };
766            let provider = match font_file.table_provider(font_index) {
767                Ok(p) => p,
768                Err(e) => {
769                    warnings.push(FontParseWarning::error(format!(
770                        "Failed to get table provider for font index {}: {}",
771                        font_index, e
772                    )));
773                    return None;
774                }
775            };
776
777            // Extract font name from NAME table early (before provider is moved)
778            let font_name = provider.table_data(tag::NAME).ok().and_then(|name_data| {
779                ReadScope::new(&name_data?)
780                    .read::<allsorts::tables::NameTable>()
781                    .ok()
782                    .and_then(|name_table| {
783                        name_table.string_for_id(allsorts::tables::NameTable::POSTSCRIPT_NAME)
784                    })
785            });
786
787            let head_table = provider
788                .table_data(tag::HEAD)
789                .ok()
790                .and_then(|head_data| ReadScope::new(&head_data?).read::<HeadTable>().ok())?;
791
792            let maxp_table = provider
793                .table_data(tag::MAXP)
794                .ok()
795                .and_then(|maxp_data| ReadScope::new(&maxp_data?).read::<MaxpTable>().ok())
796                .unwrap_or(MaxpTable {
797                    num_glyphs: 0,
798                    version1_sub_table: None,
799                });
800
801            let num_glyphs = maxp_table.num_glyphs as usize;
802
803            // Compute byte offset+length into font_bytes for hmtx/vmtx
804            // instead of copying the table data. The provider returns a
805            // borrowed slice for OpenType fonts, so we can derive the
806            // offset via pointer arithmetic.
807            let hmtx_range = provider
808                .table_data(tag::HMTX)
809                .ok()
810                .and_then(|cow_opt| {
811                    let cow = cow_opt?;
812                    match cow {
813                        std::borrow::Cow::Borrowed(slice) => {
814                            let base = font_bytes.as_ptr() as usize;
815                            let ptr = slice.as_ptr() as usize;
816                            let offset = ptr.checked_sub(base)?;
817                            if offset + slice.len() <= font_bytes.len() {
818                                Some((offset, slice.len()))
819                            } else {
820                                None
821                            }
822                        }
823                        std::borrow::Cow::Owned(_) => None,
824                    }
825                })
826                .unwrap_or((0, 0));
827
828            let vmtx_range = provider
829                .table_data(tag::VMTX)
830                .ok()
831                .and_then(|s| {
832                    let slice = s?;
833                    let base = font_bytes.as_ptr() as usize;
834                    let ptr = slice.as_ptr() as usize;
835                    let offset = ptr.checked_sub(base)?;
836                    if offset + slice.len() <= font_bytes.len() {
837                        Some((offset, slice.len()))
838                    } else {
839                        None
840                    }
841                })
842                .unwrap_or((0, 0));
843
844            // Parse vhea table (same format as hhea, used for vertical metrics)
845            let vhea_table = provider
846                .table_data(tag::VHEA)
847                .ok()
848                .and_then(|vhea_data| ReadScope::new(&vhea_data?).read::<HheaTable>().ok());
849
850            // hhea is required per the OpenType spec; return None if missing
851            let hhea_table = provider
852                .table_data(tag::HHEA)
853                .ok()
854                .and_then(|hhea_data| ReadScope::new(&hhea_data?).read::<HheaTable>().ok())?;
855
856            // Build layout-specific font metrics
857            let font_metrics = LayoutFontMetrics {
858                units_per_em: if head_table.units_per_em == 0 {
859                    1000
860                } else {
861                    head_table.units_per_em
862                },
863                ascent: hhea_table.ascender as f32,
864                descent: hhea_table.descender as f32,
865                line_gap: hhea_table.line_gap as f32,
866                x_height: None, // will be populated from OS/2 table via from_font_metrics if available
867                cap_height: None,
868            };
869
870            // Build PDF-specific font metrics
871            let pdf_font_metrics =
872                Self::parse_pdf_font_metrics(font_bytes, font_index, &head_table, &hhea_table);
873
874            // Use allsorts LocaGlyf for on-demand outline extraction. We
875            // *load* LocaGlyf eagerly (it owns ~tens of KiB of loca +
876            // ~hundreds of KiB of glyf bytes) but we *don't* decode any
877            // glyph outlines up front — that's the big RSS win. Glyphs
878            // are decoded by `ParsedFont::get_or_decode_glyph` on first
879            // access from the CPU/GPU rasterizer.
880            //
881            // When `defer_loca_glyf` is set (production lazy path via
882            // `from_bytes_shared`), we skip `LocaGlyf::load` here too —
883            // the caller will overwrite the slot with
884            // `LocaGlyfState::Deferred` carrying the source bytes
885            // `Arc<[u8]>`, and the load happens on the first
886            // `get_or_decode_glyph` call. This avoids parsing
887            // ~hundreds of KiB per face for fonts that get resolved
888            // into a chain but never actually rasterized (typical
889            // for fallback fonts in CSS chains).
890            let has_glyf = provider.has_table(tag::GLYF) && provider.has_table(tag::LOCA);
891            // Cache `has_gvar` before `provider` gets moved into
892            // `allsorts::font::Font::new(provider)` further down —
893            // it's the cheapest way to detect a variable font and
894            // avoids the borrow-after-move that a later
895            // `provider.has_table(tag::GVAR)` would incur.
896            let has_gvar = provider.has_table(tag::GVAR);
897            let loca_glyf_opt: Option<Arc<std::sync::Mutex<LocaGlyf>>> = if has_glyf
898                && !defer_loca_glyf
899            {
900                match LocaGlyf::load(&provider) {
901                    Ok(lg) => Some(Arc::new(std::sync::Mutex::new(lg))),
902                    Err(e) => {
903                        warnings.push(FontParseWarning::warning(format!(
904                            "Failed to load LocaGlyf: {} — falling back to hmtx-only", e
905                        )));
906                        None
907                    }
908                }
909            } else {
910                None
911            };
912
913            // Lazy `glyph_cache` starts empty; the space-glyph stub
914            // below pre-inserts gid 0 / space so the shaper's
915            // cmap-miss fallback has something to render without
916            // racing with a decode.
917
918            let mut font_data_impl = allsorts::font::Font::new(provider).ok()?;
919
920            // Create TrueType hinting instance from font tables
921            let hint_instance = allsorts::hinting::HintInstance::new(
922                &font_data_impl.font_table_provider
923            ).ok().flatten().map(|h| std::sync::Mutex::new(h));
924
925            // Stash raw GSUB/GPOS bytes for lazy parse. Typical fonts
926            // have ~tens of KiB of GSUB + a few-to-tens of KiB of GPOS —
927            // dwarfed by glyph outlines — so we keep the bytes around
928            // and only spend `LayoutTable::read` + `new_layout_cache`
929            // cycles when the shaper actually needs them (via
930            // `ParsedFont::gsub` / `::gpos`). For an ASCII run where no
931            // substitution / kerning is required, we skip both entirely.
932            let gsub_bytes = font_data_impl
933                .font_table_provider
934                .table_data(tag::GSUB)
935                .ok()
936                .flatten()
937                .map(|c| c.into_owned());
938            let gpos_bytes = font_data_impl
939                .font_table_provider
940                .table_data(tag::GPOS)
941                .ok()
942                .flatten()
943                .map(|c| c.into_owned());
944            let opt_gdef_table = font_data_impl.gdef_table().ok().and_then(|o| o);
945            let num_glyphs = font_data_impl.num_glyphs();
946
947            let opt_kern_table = font_data_impl
948                .kern_table()
949                .ok()
950                .and_then(|s| Some(s?.to_owned()));
951
952            let cmap_data = font_data_impl.cmap_subtable_data();
953            let cmap_subtable = ReadScope::new(cmap_data);
954            let cmap_subtable = cmap_subtable
955                .read::<CmapSubtable<'_>>()
956                .ok()
957                .and_then(|s| s.to_owned());
958
959            // Font identity hash — used by `PartialEq` for ParsedFont.
960            //
961            // Previously we did `font_bytes.hash(&mut hasher)` over
962            // the full mmap. That touched every page of the file
963            // (a 40 MiB `.ttc` walked byte-for-byte) so the "lazy
964            // mmap" ended up *fully resident* the moment we built
965            // a `ParsedFont`. Cold RSS jumped ~40 MiB from this
966            // single line.
967            //
968            // The hash doesn't need to be cryptographic — it just
969            // has to disambiguate two `ParsedFont`s. `(len, first
970            // 4 KiB, last 4 KiB, font_index)` is plenty unique and
971            // only faults in the two header / trailer pages, which
972            // shaping is going to need anyway.
973            let mut hasher = DefaultHasher::new();
974            (font_bytes.len() as u64).hash(&mut hasher);
975            let head_len = font_bytes.len().min(4096);
976            font_bytes[..head_len].hash(&mut hasher);
977            let tail_start = font_bytes.len().saturating_sub(4096);
978            font_bytes[tail_start..].hash(&mut hasher);
979            font_index.hash(&mut hasher);
980            let hash = hasher.finish();
981
982            let mut font = ParsedFont {
983                hash,
984                font_metrics,
985                pdf_font_metrics,
986                num_glyphs,
987                hhea_table,
988                hmtx_range,
989                vmtx_range,
990                vhea_table,
991                maxp_table,
992                gsub_bytes,
993                gsub_cache_lazy: std::sync::OnceLock::new(),
994                gpos_bytes,
995                gpos_cache_lazy: std::sync::OnceLock::new(),
996                opt_gdef_table,
997                opt_kern_table,
998                cmap_subtable,
999                last_used: Arc::new(std::sync::atomic::AtomicU64::new(0)),
1000                is_variable_font: has_gvar,
1001                glyph_cache: Arc::new(std::sync::RwLock::new(BTreeMap::new())),
1002                // Eager path: `from_bytes` loaded LocaGlyf immediately
1003                // (or set None if the font has no loca+glyf). Lazy
1004                // callers use `from_bytes_shared` which replaces this
1005                // with `LocaGlyfState::Deferred` before returning.
1006                loca_glyf: LocaGlyfState::Loaded(loca_glyf_opt),
1007                space_width: None,
1008                mock: None,
1009                reverse_glyph_cache: BTreeMap::new(),
1010                // Don't retain the source bytes by default — layout and
1011                // raster don't need them. PDF subsetting / `to_bytes`
1012                // callers opt in via `with_source_bytes`.
1013                original_bytes: None,
1014                original_index: font_index,
1015                index_to_cid: BTreeMap::new(), // Will be filled for CFF fonts
1016                font_type: FontType::TrueType, // Default, will be updated if CFF
1017                font_name,
1018                hint_instance,
1019            };
1020
1021            // Calculate space width
1022            let space_width = font.get_space_width_internal();
1023
1024            // Pre-decode the space glyph straight into the lazy
1025            // `glyph_cache`. Space typically has no outline, so the
1026            // decoder's outline visitor returns nothing useful and
1027            // we'd spin re-decoding it every shape — short-circuit
1028            // here with a hand-rolled record carrying the hmtx
1029            // advance.
1030            let _ = (|| {
1031                let space_gid = font.lookup_glyph_index(' ' as u32)?;
1032                if let Ok(cache) = font.glyph_cache.read() {
1033                    if cache.contains_key(&space_gid) {
1034                        return None;
1035                    }
1036                }
1037                let space_width_val = space_width?;
1038                let space_record = OwnedGlyph {
1039                    bounding_box: OwnedGlyphBoundingBox {
1040                        max_x: 0,
1041                        max_y: 0,
1042                        min_x: 0,
1043                        min_y: 0,
1044                    },
1045                    horz_advance: space_width_val as u16,
1046                    outline: Vec::new(),
1047                    phantom_points: None,
1048                    raw_points: None,
1049                    raw_on_curve: None,
1050                    raw_contour_ends: None,
1051                    instructions: None,
1052                };
1053                if let Ok(mut cache) = font.glyph_cache.write() {
1054                    cache.insert(space_gid, Arc::new(space_record));
1055                }
1056                Some(())
1057            })();
1058
1059            font.space_width = space_width;
1060
1061            Some(font)
1062        }
1063
1064        /// Attach the source font bytes to this `ParsedFont`, enabling
1065        /// [`ParsedFont::to_bytes`] and [`ParsedFont::subset`] (both of
1066        /// which the layout / shaping path never calls).
1067        ///
1068        /// Takes an `Arc<FontBytes>` so the same file's bytes can be
1069        /// shared across every face of a `.ttc` at zero extra cost —
1070        /// pair with [`rust_fontconfig::FcFontCache::get_font_bytes`].
1071        /// For ad-hoc PDF callers that have raw heap bytes, wrap them
1072        /// via `Arc::new(FontBytes::Owned(Arc::from(vec)))`.
1073        pub fn with_source_bytes(mut self, bytes: std::sync::Arc<rust_fontconfig::FontBytes>) -> Self {
1074            self.original_bytes = Some(bytes);
1075            self
1076        }
1077
1078        /// Lazy-friendly constructor — identical to
1079        /// [`ParsedFont::from_bytes`] except that `LocaGlyf` is
1080        /// **not** loaded during the call. Instead, the supplied
1081        /// `Arc<[u8]>` is retained and `LocaGlyf::load` runs the first
1082        /// time [`get_or_decode_glyph`] needs glyph outlines for this
1083        /// face.
1084        ///
1085        /// Fonts that get resolved into a CSS fallback chain but are
1086        /// never actually rasterized (common on desktop — e.g. every
1087        /// face of HelveticaNeue.ttc loads, but only one or two are
1088        /// shaped) then pay zero loca/glyf cost.
1089        ///
1090        /// Production callers (the reftest harness, `LayoutWindow`,
1091        /// `cpurender`) should prefer this constructor. Tests that
1092        /// inspect `glyph_records_decoded` directly and don't want
1093        /// a lazy path keep using `from_bytes`.
1094        pub fn from_bytes_shared(
1095            bytes: std::sync::Arc<rust_fontconfig::FontBytes>,
1096            font_index: usize,
1097            warnings: &mut Vec<FontParseWarning>,
1098        ) -> Option<Self> {
1099            // Skip the eager LocaGlyf::load via `defer_loca_glyf=true`
1100            // — saves the load-then-drop cycle the prior arrangement
1101            // paid (when this called `from_bytes`, allocated
1102            // ~hundreds of KiB of loca+glyf bytes, then immediately
1103            // replaced the slot with `Deferred` and dropped them).
1104            // `bytes.as_ref()` derefs FontBytes → &[u8] (mmap or owned
1105            // — same code path).
1106            let mut font = Self::from_bytes_internal(bytes.as_ref(), font_index, warnings, true)?;
1107            font.original_bytes = Some(bytes.clone());
1108            font.loca_glyf = LocaGlyfState::Deferred {
1109                bytes,
1110                font_index,
1111                loaded: Arc::new(std::sync::Mutex::new(None)),
1112            };
1113            Some(font)
1114        }
1115
1116        /// Resolve the current face's `LocaGlyf`, loading it lazily
1117        /// on first call when `loca_glyf` is `Deferred`. Returns
1118        /// `None` when the font has no usable loca+glyf (CFF fonts
1119        /// or parse failures).
1120        fn resolve_loca_glyf(&self) -> Option<Arc<std::sync::Mutex<LocaGlyf>>> {
1121            match &self.loca_glyf {
1122                LocaGlyfState::Loaded(inner) => inner.clone(),
1123                LocaGlyfState::Deferred { bytes, font_index, loaded } => {
1124                    // Fast path: cached LocaGlyf is present.
1125                    if let Ok(guard) = loaded.lock() {
1126                        if let Some(arc) = guard.as_ref() {
1127                            return Some(Arc::clone(arc));
1128                        }
1129                    }
1130                    let _p = crate::probe::Probe::span("resolve_loca_glyf");
1131
1132                    // Slow path: parse provider + load LocaGlyf without
1133                    // holding the slot's lock (allsorts can take a
1134                    // millisecond or two on a fresh load). Re-check
1135                    // after acquiring the write lock so a parallel
1136                    // decoder doesn't double-load.
1137                    use allsorts::{
1138                        binary::read::ReadScope,
1139                        font_data::FontData,
1140                        tables::FontTableProvider,
1141                    };
1142                    let scope = ReadScope::new(bytes.as_slice());
1143                    let font_data = scope.read::<FontData<'_>>().ok()?;
1144                    let provider = font_data.table_provider(*font_index).ok()?;
1145                    // Gate on table presence to match the `from_bytes`
1146                    // has_glyf check; avoids a spurious warning on
1147                    // CFF fonts that sneak into the Deferred path.
1148                    if !provider.has_table(tag::GLYF) || !provider.has_table(tag::LOCA) {
1149                        return None;
1150                    }
1151                    let new_arc = LocaGlyf::load(&provider)
1152                        .ok()
1153                        .map(|lg| Arc::new(std::sync::Mutex::new(lg)))?;
1154
1155                    if let Ok(mut guard) = loaded.lock() {
1156                        if let Some(existing) = guard.as_ref() {
1157                            return Some(Arc::clone(existing));
1158                        }
1159                        *guard = Some(Arc::clone(&new_arc));
1160                    }
1161                    Some(new_arc)
1162                }
1163            }
1164        }
1165
1166        /// Source bytes for PDF subsetting / table extraction.
1167        ///
1168        /// Looks in two places:
1169        /// - `original_bytes` (set by [`ParsedFont::with_source_bytes`]
1170        ///   for legacy PDF-first construction).
1171        /// - `LocaGlyfState::Deferred.bytes` (set by
1172        ///   [`ParsedFont::from_bytes_shared`] — the production lazy
1173        ///   path, which already retains an `Arc<[u8]>` for the lazy
1174        ///   loca/glyf loader).
1175        ///
1176        /// Returns `None` only for `ParsedFont`s built via the eager
1177        /// `from_bytes` path without an explicit `with_source_bytes`
1178        /// call — i.e. unit tests that load a font and don't touch
1179        /// PDF.
1180        pub fn source_bytes_for_subset(&self) -> Option<std::sync::Arc<rust_fontconfig::FontBytes>> {
1181            if let Some(bytes) = &self.original_bytes {
1182                return Some(std::sync::Arc::clone(bytes));
1183            }
1184            if let LocaGlyfState::Deferred { bytes, .. } = &self.loca_glyf {
1185                return Some(std::sync::Arc::clone(bytes));
1186            }
1187            None
1188        }
1189
1190        /// Read the monotonic-clock nanos timestamp of the most
1191        /// recent [`get_or_decode_glyph`] call on this face, or `0`
1192        /// if it's never been touched.
1193        pub fn last_used_nanos(&self) -> u64 {
1194            self.last_used.load(std::sync::atomic::Ordering::Relaxed)
1195        }
1196
1197        /// Drop the cached `LocaGlyf` for this face if it's
1198        /// `Deferred`-with-bytes-retained — so the next
1199        /// [`get_or_decode_glyph`] re-parses from `bytes`. No-op for
1200        /// `Loaded` faces (no source bytes to fall back to).
1201        ///
1202        /// Used by [`crate::text3::cache::FontManager::evict_unused`]
1203        /// and exposed publicly so embedders can free memory under
1204        /// pressure on fonts they no longer need to render.
1205        pub fn evict_loca_glyf(&self) -> bool {
1206            match &self.loca_glyf {
1207                LocaGlyfState::Deferred { loaded, .. } => {
1208                    if let Ok(mut guard) = loaded.lock() {
1209                        if guard.is_some() {
1210                            *guard = None;
1211                            return true;
1212                        }
1213                    }
1214                    false
1215                }
1216                LocaGlyfState::Loaded(_) => false,
1217            }
1218        }
1219
1220        /// Fetch the parsed GSUB cache if this font has one, parsing
1221        /// it from the retained `gsub_bytes` on first access.
1222        ///
1223        /// Moved out of the eager `from_bytes` path because most text
1224        /// runs never trigger GSUB — plain ASCII without ligatures is
1225        /// handled entirely by the cmap + hmtx fast path. Building
1226        /// `LayoutCacheData<GSUB>` up front reserved ~0.5–2 MiB per
1227        /// face just to throw it away on pages that don't shape
1228        /// complex scripts.
1229        pub fn gsub(&self) -> Option<&GsubCache> {
1230            self.gsub_cache_lazy
1231                .get_or_init(|| {
1232                    use allsorts::{
1233                        binary::read::ReadScope,
1234                        layout::{new_layout_cache, LayoutTable, GSUB},
1235                    };
1236                    let bytes = self.gsub_bytes.as_ref()?;
1237                    ReadScope::new(bytes)
1238                        .read::<LayoutTable<GSUB>>()
1239                        .ok()
1240                        .map(new_layout_cache)
1241                })
1242                .as_ref()
1243        }
1244
1245        /// Fetch the parsed GPOS cache if this font has one, parsing
1246        /// it from the retained `gpos_bytes` on first access. See
1247        /// [`ParsedFont::gsub`] for the motivation.
1248        pub fn gpos(&self) -> Option<&GposCache> {
1249            self.gpos_cache_lazy
1250                .get_or_init(|| {
1251                    use allsorts::{
1252                        binary::read::ReadScope,
1253                        layout::{new_layout_cache, LayoutTable, GPOS},
1254                    };
1255                    let bytes = self.gpos_bytes.as_ref()?;
1256                    ReadScope::new(bytes)
1257                        .read::<LayoutTable<GPOS>>()
1258                        .ok()
1259                        .map(new_layout_cache)
1260                })
1261                .as_ref()
1262        }
1263
1264        /// Fetch an `OwnedGlyph` for `gid`, decoding it on first access.
1265        ///
1266        /// Cached in the `Arc<RwLock<…>>` `glyph_cache` so subsequent
1267        /// calls (including across clones of this `ParsedFont`) hit the
1268        /// cache. Returns `None` when `gid >= num_glyphs` or the font
1269        /// has no loca+glyf and no hmtx entry for the glyph. For CFF
1270        /// fonts the returned record has an empty outline and an advance
1271        /// pulled from hmtx — matching the pre-lazy behaviour.
1272        ///
1273        /// Called on the rasterizer hot path; performance budget is a
1274        /// few µs per unique glyph (first hit) and an Arc bump + BTreeMap
1275        /// lookup (cache hits). The write lock is held only across the
1276        /// decode, not across the caller's use of the returned Arc.
1277        pub fn get_or_decode_glyph(&self, gid: u16) -> Option<std::sync::Arc<OwnedGlyph>> {
1278            use std::sync::Arc;
1279            if usize::from(gid) >= self.num_glyphs.min(u16::MAX) as usize {
1280                return None;
1281            }
1282            // Bump the LRU timestamp so `FontManager::evict_unused`
1283            // can tell this face is still in use. Cheap atomic store
1284            // (Relaxed — eviction reads the same atomic and tolerates
1285            // a slightly stale value, which only causes "evict, then
1286            // re-load on next access" — never an incorrect render).
1287            self.last_used
1288                .store(monotonic_now_nanos(), std::sync::atomic::Ordering::Relaxed);
1289
1290            // Fast path: cache hit.
1291            if let Ok(cache) = self.glyph_cache.read() {
1292                if let Some(existing) = cache.get(&gid) {
1293                    return Some(Arc::clone(existing));
1294                }
1295            }
1296
1297            // Miss: decode. We drop the read lock before taking the
1298            // write lock to avoid deadlock, and we re-check on the way
1299            // in because another thread may have decoded the same glyph
1300            // in between.
1301            let record = self.decode_glyph_inner(gid);
1302            let arc = Arc::new(record);
1303            if let Ok(mut cache) = self.glyph_cache.write() {
1304                cache
1305                    .entry(gid)
1306                    .or_insert_with(|| Arc::clone(&arc));
1307                // If another thread beat us to the insert, return theirs
1308                // so all callers observe the same Arc.
1309                if let Some(winner) = cache.get(&gid) {
1310                    return Some(Arc::clone(winner));
1311                }
1312            }
1313            Some(arc)
1314        }
1315
1316        /// Eagerly decode every glyph into the lazy `glyph_cache`,
1317        /// restoring the pre-lazy "every glyph is materialised at
1318        /// construction time" behaviour. Used by tests that iterate
1319        /// or compare against reference tooling, and by embedders
1320        /// that want a walkable view without driving every shape
1321        /// through `get_or_decode_glyph`.
1322        ///
1323        /// After `prime_glyph_cache`, callers can use
1324        /// [`ParsedFont::for_each_decoded_glyph`] or
1325        /// [`ParsedFont::glyph_cache_snapshot`] to observe the
1326        /// populated cache.
1327        pub fn prime_glyph_cache(&mut self) {
1328            let n = self.num_glyphs.min(u16::MAX) as usize;
1329            for glyph_index in 0..n {
1330                let gid = glyph_index as u16;
1331                let _ = self.get_or_decode_glyph(gid);
1332            }
1333        }
1334
1335        /// Walk every entry currently in the lazy `glyph_cache`,
1336        /// invoking `f(gid, &OwnedGlyph)` for each. Holds a read
1337        /// lock for the duration; do not call back into the font
1338        /// from `f`. The cache is populated on demand by
1339        /// [`ParsedFont::get_or_decode_glyph`] (and bulk-prefilled
1340        /// by [`ParsedFont::prime_glyph_cache`]).
1341        pub fn for_each_decoded_glyph<F: FnMut(u16, &OwnedGlyph)>(&self, mut f: F) {
1342            if let Ok(cache) = self.glyph_cache.read() {
1343                for (gid, glyph) in cache.iter() {
1344                    f(*gid, glyph.as_ref());
1345                }
1346            }
1347        }
1348
1349        /// Snapshot of the currently-decoded glyphs as a
1350        /// `BTreeMap<u16, Arc<OwnedGlyph>>`. Cheap (clones the
1351        /// Arcs, not the records). Used by callers that want to
1352        /// hand the map off across an API boundary; for in-place
1353        /// iteration prefer [`ParsedFont::for_each_decoded_glyph`].
1354        pub fn glyph_cache_snapshot(&self) -> BTreeMap<u16, Arc<OwnedGlyph>> {
1355            self.glyph_cache
1356                .read()
1357                .map(|c| c.clone())
1358                .unwrap_or_default()
1359        }
1360
1361        /// Core decode routine: produces one `OwnedGlyph` for `gid` by
1362        /// locking `loca_glyf` and running allsorts' outline visitor +
1363        /// raw-simple-glyph extraction. Factored out so both
1364        /// [`get_or_decode_glyph`] and [`prime_glyph_cache`] share it.
1365        ///
1366        /// Always returns an `OwnedGlyph` — if anything in the decode
1367        /// chain fails, falls back to an empty-outline record with the
1368        /// `hmtx` advance. This mirrors the pre-lazy behaviour where
1369        /// every gid ended up in `glyph_records_decoded`.
1370        fn hmtx_bytes(&self) -> &[u8] {
1371            let (off, len) = self.hmtx_range;
1372            if len == 0 { return &[]; }
1373            self.original_bytes.as_ref()
1374                .map(|b| &b.as_ref()[off..off+len])
1375                .unwrap_or(&[])
1376        }
1377
1378        fn vmtx_bytes(&self) -> &[u8] {
1379            let (off, len) = self.vmtx_range;
1380            if len == 0 { return &[]; }
1381            self.original_bytes.as_ref()
1382                .map(|b| &b.as_ref()[off..off+len])
1383                .unwrap_or(&[])
1384        }
1385
1386        fn decode_glyph_inner(&self, gid: u16) -> OwnedGlyph {
1387            let _p = crate::probe::Probe::span("decode_glyph");
1388            let horz_advance = allsorts::glyph_info::advance(
1389                &self.maxp_table,
1390                &self.hhea_table,
1391                self.hmtx_bytes(),
1392                gid,
1393            )
1394            .unwrap_or_default();
1395
1396            let mut record = OwnedGlyph {
1397                horz_advance,
1398                bounding_box: OwnedGlyphBoundingBox {
1399                    min_x: 0,
1400                    min_y: 0,
1401                    max_x: horz_advance as i16,
1402                    max_y: 0,
1403                },
1404                outline: Vec::new(),
1405                phantom_points: None,
1406                raw_points: None,
1407                raw_on_curve: None,
1408                raw_contour_ends: None,
1409                instructions: None,
1410            };
1411
1412            // Resolve the `LocaGlyf` for this face. For `Loaded` that's
1413            // a cheap `Arc::clone`; for `Deferred` this is where the
1414            // actual `LocaGlyf::load` happens on first access, paid once
1415            // per face that ever decodes a glyph.
1416            let Some(loca_glyf_arc) = self.resolve_loca_glyf() else {
1417                return record;
1418            };
1419            let Ok(mut loca_glyf) = loca_glyf_arc.lock() else {
1420                return record;
1421            };
1422
1423            // Visit the outline. If this is a variable font (gvar
1424            // table present) AND we still have source bytes (only
1425            // the `LocaGlyfState::Deferred` path retains them), we
1426            // re-derive a `VariableGlyfContext` here so default-
1427            // instance vs designed-instance differences land in
1428            // the decoded outline. The chained `if let` pattern
1429            // keeps `provider` and `store` in scope for the
1430            // visit, which the borrow checker requires (the
1431            // store's `Cow::Borrowed(&[u8])` tables tie its
1432            // lifetime to the provider).
1433            //
1434            // Eager-`from_bytes` faces (no retained bytes) and
1435            // non-variable fonts skip the var-context machinery
1436            // and decode the default instance — same behaviour as
1437            // before R4.
1438            let mut outline_done = false;
1439            if self.is_variable_font {
1440                if let LocaGlyfState::Deferred { bytes, .. } = &self.loca_glyf {
1441                    let scope = allsorts::binary::read::ReadScope::new(bytes);
1442                    if let Ok(font_data) =
1443                        scope.read::<allsorts::font_data::FontData<'_>>()
1444                    {
1445                        if let Ok(provider) = font_data.table_provider(self.original_index) {
1446                            if let Ok(store) = VariableGlyfContextStore::read(&provider) {
1447                                if let Ok(var_ctx) = VariableGlyfContext::new(&store) {
1448                                    let mut visitor = GlyfVisitorContext::new(
1449                                        &mut *loca_glyf,
1450                                        Some(var_ctx),
1451                                    );
1452                                    let mut collector = GlyphOutlineCollector::new();
1453                                    if visitor.visit(gid, None, &mut collector).is_ok() {
1454                                        record.outline = collector.into_outlines();
1455                                        let (min_x, min_y, max_x, max_y) =
1456                                            compute_outline_bbox(&record.outline);
1457                                        record.bounding_box = OwnedGlyphBoundingBox {
1458                                            min_x,
1459                                            min_y,
1460                                            max_x,
1461                                            max_y,
1462                                        };
1463                                        outline_done = true;
1464                                    }
1465                                }
1466                            }
1467                        }
1468                    }
1469                }
1470            }
1471            if !outline_done {
1472                let mut visitor =
1473                    GlyfVisitorContext::new(&mut *loca_glyf, None);
1474                let mut collector = GlyphOutlineCollector::new();
1475                if visitor.visit(gid, None, &mut collector).is_ok() {
1476                    record.outline = collector.into_outlines();
1477                    let (min_x, min_y, max_x, max_y) =
1478                        compute_outline_bbox(&record.outline);
1479                    record.bounding_box = OwnedGlyphBoundingBox {
1480                        min_x,
1481                        min_y,
1482                        max_x,
1483                        max_y,
1484                    };
1485                }
1486            }
1487
1488            // Second pass: pull raw SimpleGlyph data for TrueType
1489            // bytecode hinting. LocaGlyf caches the `Arc<Glyph>`
1490            // internally so this lookup is cheap after the first call.
1491            if let Ok(glyph_arc) = loca_glyf.glyph(gid) {
1492                if let allsorts::tables::glyf::Glyph::Simple(sg) = glyph_arc.as_ref() {
1493                    record.raw_points = Some(
1494                        sg.coordinates.iter().map(|(_, pt)| (pt.0, pt.1)).collect(),
1495                    );
1496                    record.raw_on_curve = Some(
1497                        sg.coordinates.iter().map(|(f, _)| f.is_on_curve()).collect(),
1498                    );
1499                    record.raw_contour_ends = Some(sg.end_pts_of_contours.clone());
1500                    record.instructions = Some(sg.instructions.to_vec());
1501                }
1502            }
1503
1504            record
1505        }
1506
1507        /// Parse PDF-specific font metrics from HEAD, HHEA, and OS/2 tables
1508        fn parse_pdf_font_metrics(
1509            font_bytes: &[u8],
1510            font_index: usize,
1511            head_table: &allsorts::tables::HeadTable,
1512            hhea_table: &allsorts::tables::HheaTable,
1513        ) -> PdfFontMetrics {
1514            use allsorts::{
1515                binary::read::ReadScope,
1516                font_data::FontData,
1517                tables::{os2::Os2, FontTableProvider},
1518                tag,
1519            };
1520
1521            let scope = ReadScope::new(font_bytes);
1522            let font_file = scope.read::<FontData<'_>>().ok();
1523            let provider = font_file
1524                .as_ref()
1525                .and_then(|ff| ff.table_provider(font_index).ok());
1526
1527            let os2_table = provider
1528                .as_ref()
1529                .and_then(|p| p.table_data(tag::OS_2).ok())
1530                .and_then(|os2_data| {
1531                    let data = os2_data?;
1532                    let scope = ReadScope::new(&data);
1533                    scope.read_dep::<Os2>(data.len()).ok()
1534                });
1535
1536            // Base metrics from HEAD and HHEA (always present)
1537            let base = PdfFontMetrics {
1538                units_per_em: head_table.units_per_em,
1539                font_flags: head_table.flags,
1540                x_min: head_table.x_min,
1541                y_min: head_table.y_min,
1542                x_max: head_table.x_max,
1543                y_max: head_table.y_max,
1544                ascender: hhea_table.ascender,
1545                descender: hhea_table.descender,
1546                line_gap: hhea_table.line_gap,
1547                advance_width_max: hhea_table.advance_width_max,
1548                caret_slope_rise: hhea_table.caret_slope_rise,
1549                caret_slope_run: hhea_table.caret_slope_run,
1550                ..PdfFontMetrics::zero()
1551            };
1552
1553            // Add OS/2 metrics if available
1554            os2_table
1555                .map(|os2| PdfFontMetrics {
1556                    x_avg_char_width: os2.x_avg_char_width,
1557                    us_weight_class: os2.us_weight_class,
1558                    us_width_class: os2.us_width_class,
1559                    y_strikeout_size: os2.y_strikeout_size,
1560                    y_strikeout_position: os2.y_strikeout_position,
1561                    ..base
1562                })
1563                .unwrap_or(base)
1564        }
1565
1566        /// Returns the width of the space character in font units.
1567        ///
1568        /// This is used internally for text layout calculations.
1569        /// Returns `None` if the font has no space glyph or its width cannot be determined.
1570        fn get_space_width_internal(&self) -> Option<usize> {
1571            if let Some(mock) = self.mock.as_ref() {
1572                return mock.space_width;
1573            }
1574            let glyph_index = self.lookup_glyph_index(' ' as u32)?;
1575
1576            allsorts::glyph_info::advance(
1577                &self.maxp_table,
1578                &self.hhea_table,
1579                self.hmtx_bytes(),
1580                glyph_index,
1581            )
1582            .ok()
1583            .map(|s| s as usize)
1584        }
1585
1586        /// Look up the glyph index for a Unicode codepoint
1587        pub fn lookup_glyph_index(&self, codepoint: u32) -> Option<u16> {
1588            let cmap = self.cmap_subtable.as_ref()?;
1589            cmap.map_glyph(codepoint).ok().flatten()
1590        }
1591
1592        /// Get the horizontal advance width for a glyph in font units.
1593        ///
1594        /// Pulled straight from the `hmtx` table — no glyph-outline
1595        /// decode. Called once per shaped glyph per layout pass, so
1596        /// avoiding the lazy decode here is a meaningful win over
1597        /// routing through `get_or_decode_glyph`.
1598        pub fn get_horizontal_advance(&self, glyph_index: u16) -> u16 {
1599            if let Some(mock) = self.mock.as_ref() {
1600                return mock.glyph_advances.get(&glyph_index).copied().unwrap_or(0);
1601            }
1602            allsorts::glyph_info::advance(
1603                &self.maxp_table,
1604                &self.hhea_table,
1605                self.hmtx_bytes(),
1606                glyph_index,
1607            )
1608            .unwrap_or_default()
1609        }
1610
1611        /// Get the hinted advance width in pixels for a glyph at the given ppem.
1612        ///
1613        /// For glyphs with outlines, runs TrueType bytecode hinting to get the
1614        /// grid-fitted advance from phantom points. For glyphs without outlines
1615        /// (e.g. space), rounds the scaled advance to the pixel grid, matching
1616        /// FreeType's behavior.
1617        ///
1618        /// Returns `None` if hinting is not available or fails.
1619        pub fn get_hinted_advance_px(&self, glyph_index: u16, ppem: u16) -> Option<f32> {
1620            let glyph = self.get_or_decode_glyph(glyph_index)?;
1621
1622            let upem = self.font_metrics.units_per_em;
1623            if upem == 0 || ppem == 0 {
1624                return None;
1625            }
1626
1627            // Check if we even have a hint instance
1628            let _hint_mutex = self.hint_instance.as_ref()?;
1629
1630            use allsorts::hinting::f26dot6::{compute_scale, F26Dot6};
1631            let scale = compute_scale(ppem, upem);
1632            let adv_f26dot6 = F26Dot6::from_funits(glyph.horz_advance as i32, scale);
1633
1634            // For glyphs with outline data, run bytecode hinting
1635            if let (Some(raw_points), Some(raw_on_curve), Some(raw_contour_ends)) = (
1636                glyph.raw_points.as_ref(),
1637                glyph.raw_on_curve.as_ref(),
1638                glyph.raw_contour_ends.as_ref(),
1639            ) {
1640                let instructions = glyph.instructions.as_deref().unwrap_or(&[]);
1641                let mut hint = _hint_mutex.lock().ok()?;
1642                hint.set_ppem(ppem, ppem as f64).ok()?;
1643
1644                let points_f26dot6: Vec<(i32, i32)> = raw_points
1645                    .iter()
1646                    .map(|&(x, y)| {
1647                        let sx = F26Dot6::from_funits(x as i32, scale);
1648                        let sy = F26Dot6::from_funits(y as i32, scale);
1649                        (sx.to_bits(), sy.to_bits())
1650                    })
1651                    .collect();
1652
1653                // Use the scaled advance rounded to pixel grid, NOT the hinted
1654                // phantom point.  Some glyph programs apply ClearType-specific
1655                // SHPIX adjustments to the advance phantom point that are wrong
1656                // for non-ClearType rendering.  The rounded scaled advance matches
1657                // FreeType's DEFAULT mode advance output.
1658                let rounded = (adv_f26dot6.to_bits() + 32) & !63;
1659                Some(rounded as f32 / 64.0)
1660            } else {
1661                // No outline (e.g. space): use scaled advance, rounded to grid
1662                // (matching FreeType's phantom point pre-rounding)
1663                let rounded = (adv_f26dot6.to_bits() + 32) & !63;
1664                Some(rounded as f32 / 64.0)
1665            }
1666        }
1667
1668        /// Get the number of glyphs in this font
1669        pub fn num_glyphs(&self) -> u16 {
1670            self.num_glyphs
1671        }
1672
1673        /// Check if this font has a glyph for the given codepoint
1674        pub fn has_glyph(&self, codepoint: u32) -> bool {
1675            self.lookup_glyph_index(codepoint).is_some()
1676        }
1677
1678        /// Get vertical metrics for a glyph (for vertical text layout).
1679        ///
1680        /// Uses vhea+vmtx tables (same binary format as hhea+hmtx).
1681        /// Returns None if font has no vertical metrics tables.
1682        pub fn get_vertical_metrics(
1683            &self,
1684            glyph_id: u16,
1685        ) -> Option<crate::text3::cache::VerticalMetrics> {
1686            let vhea = self.vhea_table.as_ref()?;
1687            if self.vmtx_range.1 == 0 {
1688                return None;
1689            }
1690            let vert_advance = allsorts::glyph_info::advance(
1691                &self.maxp_table, vhea, self.vmtx_bytes(), glyph_id,
1692            ).ok()? as f32;
1693
1694            let units_per_em = self.font_metrics.units_per_em as f32;
1695            let scale = if units_per_em > 0.0 { 1.0 / units_per_em } else { 0.001 };
1696
1697            // Vertical bearing: approximate from glyph bbox if available
1698            let (bearing_x, bearing_y) = self.get_or_decode_glyph(glyph_id)
1699                .map(|g| {
1700                    let bbox = &g.bounding_box;
1701                    // tsb (top side bearing): origin_y - max_y
1702                    // lsb for vertical: center the glyph horizontally
1703                    let width = (bbox.max_x - bbox.min_x) as f32;
1704                    (-(width / 2.0) * scale, (vert_advance * scale) - (bbox.max_y as f32 * scale))
1705                })
1706                .unwrap_or((0.0, 0.0));
1707
1708            Some(crate::text3::cache::VerticalMetrics {
1709                advance: vert_advance * scale,
1710                bearing_x,
1711                bearing_y,
1712                origin_y: self.font_metrics.ascent * scale,
1713            })
1714        }
1715
1716        /// Get layout-specific font metrics
1717        pub fn get_font_metrics(&self) -> crate::text3::cache::LayoutFontMetrics {
1718            // Ensure descent is positive (OpenType may have negative descent)
1719            let descent = if self.font_metrics.descent > 0.0 {
1720                self.font_metrics.descent
1721            } else {
1722                -self.font_metrics.descent
1723            };
1724
1725            crate::text3::cache::LayoutFontMetrics {
1726                ascent: self.font_metrics.ascent,
1727                descent,
1728                line_gap: self.font_metrics.line_gap,
1729                units_per_em: self.font_metrics.units_per_em,
1730                x_height: self.font_metrics.x_height,
1731                cap_height: self.font_metrics.cap_height,
1732            }
1733        }
1734
1735        /// Convert the ParsedFont back to bytes using allsorts::whole_font
1736        /// This reconstructs the entire font from the parsed data
1737        ///
1738        /// Source bytes come from either the explicit
1739        /// [`ParsedFont::with_source_bytes`] handle (PDF-first
1740        /// construction) *or* the `LocaGlyfState::Deferred` slot
1741        /// installed by [`ParsedFont::from_bytes_shared`]. The
1742        /// production lazy path retains bytes for the lazy LocaGlyf
1743        /// loader, so PDF subsetting Just Works without an extra
1744        /// `with_source_bytes` call.
1745        ///
1746        /// # Arguments
1747        /// * `tags` - Optional list of specific table tags to include (None = all tables)
1748        pub fn to_bytes(&self, tags: Option<&[u32]>) -> Result<Vec<u8>, String> {
1749            let source = self.source_bytes_for_subset().ok_or_else(|| {
1750                "ParsedFont::to_bytes requires source bytes; construct via \
1751                 ParsedFont::from_bytes_shared (production lazy path) or \
1752                 attach via ParsedFont::with_source_bytes"
1753                    .to_string()
1754            })?;
1755            let scope = ReadScope::new(source.as_slice());
1756            let font_file = scope.read::<FontData<'_>>().map_err(|e| e.to_string())?;
1757            let provider = font_file
1758                .table_provider(self.original_index)
1759                .map_err(|e| e.to_string())?;
1760
1761            let tags_to_use = tags.unwrap_or(&[
1762                tag::CMAP,
1763                tag::HEAD,
1764                tag::HHEA,
1765                tag::HMTX,
1766                tag::MAXP,
1767                tag::NAME,
1768                tag::OS_2,
1769                tag::POST,
1770                tag::GLYF,
1771                tag::LOCA,
1772            ]);
1773
1774            whole_font(&provider, tags_to_use).map_err(|e| e.to_string())
1775        }
1776
1777        /// Create a subset font containing only the specified glyph IDs
1778        /// Returns the subset font bytes and a mapping from old to new glyph IDs
1779        ///
1780        /// # Arguments
1781        /// * `glyph_ids` - The glyph IDs to include in the subset (glyph 0/.notdef is always
1782        ///   included)
1783        /// * `cmap_target` - Target cmap format (Unicode for web, MacRoman for compatibility)
1784        ///
1785        /// # Returns
1786        /// A tuple of (subset_font_bytes, glyph_mapping) where glyph_mapping maps
1787        /// original_glyph_id -> (new_glyph_id, original_char)
1788        pub fn subset(
1789            &self,
1790            glyph_ids: &[(u16, char)],
1791            cmap_target: CmapTarget,
1792        ) -> Result<(Vec<u8>, BTreeMap<u16, (u16, char)>), String> {
1793            let source = self.source_bytes_for_subset().ok_or_else(|| {
1794                "ParsedFont::subset requires source bytes; construct via \
1795                 ParsedFont::from_bytes_shared (production lazy path) or \
1796                 attach via ParsedFont::with_source_bytes"
1797                    .to_string()
1798            })?;
1799            let scope = ReadScope::new(source.as_slice());
1800            let font_file = scope.read::<FontData<'_>>().map_err(|e| e.to_string())?;
1801            let provider = font_file
1802                .table_provider(self.original_index)
1803                .map_err(|e| e.to_string())?;
1804
1805            // Build glyph mapping: original_id -> (new_id, char)
1806            let glyph_mapping: BTreeMap<u16, (u16, char)> = glyph_ids
1807                .iter()
1808                .enumerate()
1809                .map(|(new_id, &(original_id, ch))| (original_id, (new_id as u16, ch)))
1810                .collect();
1811
1812            // Extract just the glyph IDs for subsetting
1813            let ids: Vec<u16> = glyph_ids.iter().map(|(id, _)| *id).collect();
1814
1815            // Use PDF profile for embedding fonts in PDFs
1816            let font_bytes = allsorts_subset(&provider, &ids, &SubsetProfile::Pdf, cmap_target)
1817                .map_err(|e| format!("Subset error: {:?}", e))?;
1818
1819            Ok((font_bytes, glyph_mapping))
1820        }
1821
1822        /// Get the width of a glyph in font units (internal, unscaled)
1823        pub fn get_glyph_width_internal(&self, glyph_index: u16) -> Option<usize> {
1824            allsorts::glyph_info::advance(
1825                &self.maxp_table,
1826                &self.hhea_table,
1827                self.hmtx_bytes(),
1828                glyph_index,
1829            )
1830            .ok()
1831            .map(|s| s as usize)
1832        }
1833
1834        /// Get the width of the space character (unscaled font units)
1835        #[inline]
1836        pub const fn get_space_width(&self) -> Option<usize> {
1837            self.space_width
1838        }
1839
1840        /// Add glyph-to-text mapping to reverse cache
1841        /// This should be called during text shaping when we know both the source text and
1842        /// resulting glyphs
1843        pub fn cache_glyph_mapping(&mut self, glyph_id: u16, cluster_text: &str) {
1844            self.reverse_glyph_cache
1845                .insert(glyph_id, cluster_text.to_string());
1846        }
1847
1848        /// Get the cluster text that produced a specific glyph ID
1849        /// Returns the original text that was shaped into this glyph (handles ligatures correctly)
1850        pub fn get_glyph_cluster_text(&self, glyph_id: u16) -> Option<&str> {
1851            self.reverse_glyph_cache.get(&glyph_id).map(|s| s.as_str())
1852        }
1853
1854        /// Get the first character from the cluster text for a glyph ID
1855        /// This is useful for PDF ToUnicode CMap generation which requires single character
1856        /// mappings
1857        pub fn get_glyph_primary_char(&self, glyph_id: u16) -> Option<char> {
1858            self.reverse_glyph_cache
1859                .get(&glyph_id)
1860                .and_then(|text| text.chars().next())
1861        }
1862
1863        /// Clear the reverse glyph cache (useful for memory management)
1864        pub fn clear_glyph_cache(&mut self) {
1865            self.reverse_glyph_cache.clear();
1866        }
1867
1868        /// Get the bounding box size of a glyph (unscaled units) - for PDF
1869        /// Returns (width, height) in font units
1870        pub fn get_glyph_bbox_size(&self, glyph_index: u16) -> Option<(i32, i32)> {
1871            let g = self.get_or_decode_glyph(glyph_index)?;
1872            let glyph_width = g.horz_advance as i32;
1873            let glyph_height = g.bounding_box.max_y as i32 - g.bounding_box.min_y as i32;
1874            Some((glyph_width, glyph_height))
1875        }
1876    }
1877
1878    /// Compute the bounding box from collected glyph outlines.
1879    fn compute_outline_bbox(outlines: &[GlyphOutline]) -> (i16, i16, i16, i16) {
1880        let mut min_x = i16::MAX;
1881        let mut min_y = i16::MAX;
1882        let mut max_x = i16::MIN;
1883        let mut max_y = i16::MIN;
1884        let mut has_points = false;
1885
1886        for outline in outlines {
1887            for op in outline.operations.as_slice() {
1888                let points: &[(i16, i16)] = match op {
1889                    GlyphOutlineOperation::MoveTo(m) => &[(m.x, m.y)],
1890                    GlyphOutlineOperation::LineTo(l) => &[(l.x, l.y)],
1891                    GlyphOutlineOperation::QuadraticCurveTo(q) => {
1892                        // Check both control and end point for bbox
1893                        min_x = min_x.min(q.ctrl_1_x).min(q.end_x);
1894                        min_y = min_y.min(q.ctrl_1_y).min(q.end_y);
1895                        max_x = max_x.max(q.ctrl_1_x).max(q.end_x);
1896                        max_y = max_y.max(q.ctrl_1_y).max(q.end_y);
1897                        has_points = true;
1898                        continue;
1899                    }
1900                    GlyphOutlineOperation::CubicCurveTo(c) => {
1901                        min_x = min_x.min(c.ctrl_1_x).min(c.ctrl_2_x).min(c.end_x);
1902                        min_y = min_y.min(c.ctrl_1_y).min(c.ctrl_2_y).min(c.end_y);
1903                        max_x = max_x.max(c.ctrl_1_x).max(c.ctrl_2_x).max(c.end_x);
1904                        max_y = max_y.max(c.ctrl_1_y).max(c.ctrl_2_y).max(c.end_y);
1905                        has_points = true;
1906                        continue;
1907                    }
1908                    GlyphOutlineOperation::ClosePath => continue,
1909                };
1910                for &(x, y) in points {
1911                    min_x = min_x.min(x);
1912                    min_y = min_y.min(y);
1913                    max_x = max_x.max(x);
1914                    max_y = max_y.max(y);
1915                    has_points = true;
1916                }
1917            }
1918        }
1919
1920        if has_points {
1921            (min_x, min_y, max_x, max_y)
1922        } else {
1923            (0, 0, 0, 0)
1924        }
1925    }
1926
1927    #[derive(Debug, Clone)]
1928    pub struct OwnedGlyph {
1929        pub bounding_box: OwnedGlyphBoundingBox,
1930        pub horz_advance: u16,
1931        pub outline: Vec<GlyphOutline>,
1932        pub phantom_points: Option<[Point; 4]>,
1933        /// Raw TrueType points in font units (for hinting). None for composite/CFF glyphs.
1934        pub raw_points: Option<Vec<(i16, i16)>>,
1935        /// On-curve flags for each raw point.
1936        pub raw_on_curve: Option<Vec<bool>>,
1937        /// Contour end-point indices (TrueType).
1938        pub raw_contour_ends: Option<Vec<u16>>,
1939        /// Per-glyph TrueType hinting instructions.
1940        pub instructions: Option<Vec<u8>>,
1941    }
1942
1943    // --- ParsedFontTrait Implementation for ParsedFont ---
1944
1945    impl crate::text3::cache::ShallowClone for ParsedFont {
1946        fn shallow_clone(&self) -> Self {
1947            self.clone() // ParsedFont::clone uses Arc internally, so it's shallow
1948        }
1949    }
1950
1951    impl crate::text3::cache::ParsedFontTrait for ParsedFont {
1952        fn shape_text(
1953            &self,
1954            text: &str,
1955            script: crate::font_traits::Script,
1956            language: crate::font_traits::Language,
1957            direction: crate::font_traits::BidiDirection,
1958            style: &crate::font_traits::StyleProperties,
1959        ) -> Result<Vec<crate::font_traits::Glyph>, crate::font_traits::LayoutError> {
1960            // Call the existing shape_text_for_parsed_font method (defined in default.rs)
1961            crate::text3::default::shape_text_for_parsed_font(
1962                self, text, script, language, direction, style,
1963            )
1964        }
1965
1966        fn get_hash(&self) -> u64 {
1967            self.hash
1968        }
1969
1970        fn get_glyph_size(
1971            &self,
1972            glyph_id: u16,
1973            font_size_px: f32,
1974        ) -> Option<azul_core::geom::LogicalSize> {
1975            self.get_or_decode_glyph(glyph_id).map(|record| {
1976                let units_per_em = self.font_metrics.units_per_em as f32;
1977                let scale_factor = if units_per_em > 0.0 {
1978                    font_size_px / units_per_em
1979                } else {
1980                    0.01
1981                };
1982                let bbox = &record.bounding_box;
1983                azul_core::geom::LogicalSize {
1984                    width: (bbox.max_x - bbox.min_x) as f32 * scale_factor,
1985                    height: (bbox.max_y - bbox.min_y) as f32 * scale_factor,
1986                }
1987            })
1988        }
1989
1990        fn get_hyphen_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
1991            let glyph_id = self.lookup_glyph_index('-' as u32)?;
1992            let advance_units = self.get_horizontal_advance(glyph_id);
1993            let scale_factor = if self.font_metrics.units_per_em > 0 {
1994                font_size / (self.font_metrics.units_per_em as f32)
1995            } else {
1996                return None;
1997            };
1998            let scaled_advance = advance_units as f32 * scale_factor;
1999            Some((glyph_id, scaled_advance))
2000        }
2001
2002        fn get_kashida_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
2003            let glyph_id = self.lookup_glyph_index('\u{0640}' as u32)?;
2004            let advance_units = self.get_horizontal_advance(glyph_id);
2005            let scale_factor = if self.font_metrics.units_per_em > 0 {
2006                font_size / (self.font_metrics.units_per_em as f32)
2007            } else {
2008                return None;
2009            };
2010            let scaled_advance = advance_units as f32 * scale_factor;
2011            Some((glyph_id, scaled_advance))
2012        }
2013
2014        fn has_glyph(&self, codepoint: u32) -> bool {
2015            self.lookup_glyph_index(codepoint).is_some()
2016        }
2017
2018        fn get_vertical_metrics(
2019            &self,
2020            glyph_id: u16,
2021        ) -> Option<crate::text3::cache::VerticalMetrics> {
2022            self.get_vertical_metrics(glyph_id)
2023        }
2024
2025        fn get_font_metrics(&self) -> crate::text3::cache::LayoutFontMetrics {
2026            self.font_metrics.clone()
2027        }
2028
2029        fn num_glyphs(&self) -> u16 {
2030            self.num_glyphs
2031        }
2032
2033        fn get_space_width(&self) -> Option<usize> {
2034            self.space_width
2035        }
2036    }
2037
2038    /// Build an agg-rust PathStorage from an OwnedGlyph outline (in font units, Y-up → Y-down).
2039    ///
2040    /// Returns `None` if the glyph has no outline operations (e.g. space).
2041    /// The caller is responsible for applying scale and translation transforms.
2042    #[cfg(feature = "cpurender")]
2043    pub fn build_glyph_path(glyph: &OwnedGlyph) -> Option<agg_rust::path_storage::PathStorage> {
2044        use agg_rust::{basics::PATH_FLAGS_NONE, path_storage::PathStorage};
2045
2046        let mut path = PathStorage::new();
2047        let mut has_ops = false;
2048        for outline in &glyph.outline {
2049            for op in outline.operations.as_slice() {
2050                has_ops = true;
2051                match op {
2052                    GlyphOutlineOperation::MoveTo(OutlineMoveTo { x, y }) => {
2053                        path.move_to(*x as f64, -(*y as f64));
2054                    }
2055                    GlyphOutlineOperation::LineTo(OutlineLineTo { x, y }) => {
2056                        path.line_to(*x as f64, -(*y as f64));
2057                    }
2058                    GlyphOutlineOperation::QuadraticCurveTo(OutlineQuadTo {
2059                        ctrl_1_x, ctrl_1_y, end_x, end_y,
2060                    }) => {
2061                        path.curve3(
2062                            *ctrl_1_x as f64, -(*ctrl_1_y as f64),
2063                            *end_x as f64, -(*end_y as f64),
2064                        );
2065                    }
2066                    GlyphOutlineOperation::CubicCurveTo(OutlineCubicTo {
2067                        ctrl_1_x, ctrl_1_y, ctrl_2_x, ctrl_2_y, end_x, end_y,
2068                    }) => {
2069                        path.curve4(
2070                            *ctrl_1_x as f64, -(*ctrl_1_y as f64),
2071                            *ctrl_2_x as f64, -(*ctrl_2_y as f64),
2072                            *end_x as f64, -(*end_y as f64),
2073                        );
2074                    }
2075                    GlyphOutlineOperation::ClosePath => {
2076                        path.close_polygon(PATH_FLAGS_NONE);
2077                    }
2078                }
2079            }
2080        }
2081        if !has_ops {
2082            return None;
2083        }
2084        Some(path)
2085    }
2086}