azul_layout/font.rs
1//! Font parsing, metrics extraction, and subsetting.
2//!
3//! This module provides the core font infrastructure for text layout and PDF generation:
4//! - `loading`: System font cache construction and font reload errors
5//! - `mock`: Mock font implementation for testing without real font files
6//! - `parsed`: Full font parsing via allsorts (outlines, metrics, shaping tables, subsetting)
7
8#![cfg(feature = "font_loading")]
9
10use azul_css::{AzString, U8Vec};
11use rust_fontconfig::{FcFontCache, OwnedFontSource};
12
13pub mod loading {
14 #![cfg(feature = "std")]
15 #![cfg(feature = "font_loading")]
16 #![cfg_attr(not(feature = "std"), no_std)]
17
18 use std::io::Error as IoError;
19
20 use azul_css::{AzString, StringVec, U8Vec};
21 use rust_fontconfig::FcFontCache;
22
23 #[cfg(not(miri))]
24 pub fn build_font_cache() -> FcFontCache {
25 FcFontCache::build()
26 }
27
28 #[cfg(miri)]
29 pub fn build_font_cache() -> FcFontCache {
30 FcFontCache::default()
31 }
32
33 #[derive(Debug)]
34 pub enum FontReloadError {
35 Io(IoError, AzString),
36 FontNotFound(AzString),
37 FontLoadingNotActive(AzString),
38 }
39
40 impl Clone for FontReloadError {
41 fn clone(&self) -> Self {
42 use self::FontReloadError::*;
43 match self {
44 Io(err, path) => Io(IoError::new(err.kind(), "Io Error"), path.clone()),
45 FontNotFound(id) => FontNotFound(id.clone()),
46 FontLoadingNotActive(id) => FontLoadingNotActive(id.clone()),
47 }
48 }
49 }
50
51 azul_core::impl_display!(FontReloadError, {
52 Io(err, path_buf) => format!("Could not load \"{}\" - IO error: {}", path_buf.as_str(), err),
53 FontNotFound(id) => format!("Could not locate system font: \"{:?}\" found", id),
54 FontLoadingNotActive(id) => format!("Could not load system font: \"{:?}\": crate was not compiled with --features=\"font_loading\"", id)
55 });
56}
57pub mod mock {
58 //! Mock font implementation for testing text layout.
59 //!
60 //! Provides a `MockFont` that simulates font behavior without requiring
61 //! actual font files, useful for unit testing text layout functionality.
62
63 use std::collections::BTreeMap;
64
65 use crate::text3::cache::LayoutFontMetrics;
66
67 /// A mock font implementation for testing text layout without real fonts.
68 ///
69 /// This allows testing text shaping, layout, and rendering code paths
70 /// without needing to load actual TrueType/OpenType font files.
71 #[derive(Debug, Clone)]
72 pub struct MockFont {
73 /// Font metrics (ascent, descent, etc.).
74 pub font_metrics: LayoutFontMetrics,
75 /// Width of the space character in font units.
76 pub space_width: Option<usize>,
77 /// Horizontal advance widths keyed by glyph ID.
78 pub glyph_advances: BTreeMap<u16, u16>,
79 /// Glyph bounding box sizes (width, height) keyed by glyph ID.
80 pub glyph_sizes: BTreeMap<u16, (i32, i32)>,
81 /// Unicode codepoint to glyph ID mapping.
82 pub glyph_indices: BTreeMap<u32, u16>,
83 }
84
85 impl MockFont {
86 /// Creates a new `MockFont` with the given font metrics.
87 pub fn new(font_metrics: LayoutFontMetrics) -> Self {
88 MockFont {
89 font_metrics,
90 space_width: Some(10),
91 glyph_advances: BTreeMap::new(),
92 glyph_sizes: BTreeMap::new(),
93 glyph_indices: BTreeMap::new(),
94 }
95 }
96
97 /// Sets the space character width.
98 pub fn with_space_width(mut self, width: usize) -> Self {
99 self.space_width = Some(width);
100 self
101 }
102
103 /// Adds a horizontal advance value for a glyph.
104 pub fn with_glyph_advance(mut self, glyph_index: u16, advance: u16) -> Self {
105 self.glyph_advances.insert(glyph_index, advance);
106 self
107 }
108
109 /// Adds a bounding box size for a glyph.
110 pub fn with_glyph_size(mut self, glyph_index: u16, size: (i32, i32)) -> Self {
111 self.glyph_sizes.insert(glyph_index, size);
112 self
113 }
114
115 /// Adds a Unicode codepoint to glyph ID mapping.
116 pub fn with_glyph_index(mut self, unicode: u32, index: u16) -> Self {
117 self.glyph_indices.insert(unicode, index);
118 self
119 }
120 }
121}
122
123pub mod parsed {
124 use core::fmt;
125 use std::{collections::BTreeMap, sync::Arc};
126
127 use allsorts::{
128 binary::read::ReadScope,
129 font_data::FontData,
130 layout::{GDEFTable, LayoutCache, LayoutCacheData, GPOS, GSUB},
131 outline::{OutlineBuilder, OutlineSink},
132 pathfinder_geometry::{line_segment::LineSegment2F, vector::Vector2F},
133 subset::{subset as allsorts_subset, whole_font, CmapTarget, SubsetProfile},
134 tables::{
135 cmap::owned::CmapSubtable as OwnedCmapSubtable,
136 glyf::{
137 Glyph, GlyfVisitorContext, LocaGlyf, Point,
138 VariableGlyfContext, VariableGlyfContextStore,
139 },
140 kern::owned::KernTable,
141 FontTableProvider, HheaTable, MaxpTable,
142 },
143 tag,
144 };
145 use azul_core::resources::{
146 GlyphOutline, GlyphOutlineOperation, OutlineCubicTo, OutlineLineTo, OutlineMoveTo,
147 OutlineQuadTo, OwnedGlyphBoundingBox,
148 };
149 use azul_css::props::basic::FontMetrics as CssFontMetrics;
150
151 // Mock font module for testing
152 pub use crate::font::mock::MockFont;
153 use crate::text3::cache::LayoutFontMetrics;
154
155 /// Cached GSUB table for glyph substitution operations.
156 pub type GsubCache = Arc<LayoutCacheData<GSUB>>;
157 /// Cached GPOS table for glyph positioning operations.
158 pub type GposCache = Arc<LayoutCacheData<GPOS>>;
159
160 /// Monotonic-clock nanos since process start. Used to timestamp
161 /// `ParsedFont.last_used` for LRU eviction. Cheap (single
162 /// `Instant::now`); resolution is plenty fine for "did this
163 /// face get touched in the last N seconds" decisions. Exposed
164 /// `pub(crate)` so `FontManager::evict_unused` reads from the
165 /// same clock as `last_used` writes.
166 pub(crate) fn monotonic_now_nanos() -> u64 {
167 // Safe: `Instant::elapsed` against the same launch instant is
168 // monotonic and never overflows in any realistic process
169 // lifetime (>500 years).
170 use std::sync::OnceLock;
171 use std::time::Instant;
172 static LAUNCH: OnceLock<Instant> = OnceLock::new();
173 let start = LAUNCH.get_or_init(Instant::now);
174 start.elapsed().as_nanos() as u64
175 }
176
177 /// Glyph-outline decoder state. See the
178 /// [`ParsedFont::loca_glyf`] field docs for the full description.
179 #[derive(Clone)]
180 pub(crate) enum LocaGlyfState {
181 /// Ready to decode immediately, or known to have no outline
182 /// data. `None` covers both CFF fonts and fonts where the
183 /// loca+glyf parse failed.
184 ///
185 /// This variant *cannot* be evicted by
186 /// [`crate::text3::cache::FontManager::evict_unused`]: there
187 /// are no source bytes retained to re-decode from. The eager
188 /// `from_bytes` path (tests, `with_source_bytes` PDF callers)
189 /// produces this variant.
190 Loaded(Option<Arc<std::sync::Mutex<LocaGlyf>>>),
191 /// Font bytes retained for lazy `LocaGlyf` construction.
192 ///
193 /// `loaded` is `Mutex<Option<…>>` (not `OnceLock`) so an
194 /// idle eviction can clear it back to `None`; the next
195 /// `get_or_decode_glyph` will re-parse from `bytes`. Two-step
196 /// double-check pattern in `resolve_loca_glyf` keeps the
197 /// expensive `LocaGlyf::load` outside the critical section.
198 Deferred {
199 bytes: Arc<rust_fontconfig::FontBytes>,
200 font_index: usize,
201 loaded: Arc<std::sync::Mutex<Option<Arc<std::sync::Mutex<LocaGlyf>>>>>,
202 },
203 }
204
205 /// Adapter that collects allsorts outline commands into our `GlyphOutline` format.
206 ///
207 /// Implements `OutlineSink` so it can be passed to `GlyfVisitorContext::visit()`.
208 /// This handles composite glyph resolution, transforms, and variable font
209 /// deltas automatically via allsorts internals.
210 struct GlyphOutlineCollector {
211 contours: Vec<GlyphOutline>,
212 current_contour: Vec<GlyphOutlineOperation>,
213 }
214
215 impl GlyphOutlineCollector {
216 fn new() -> Self {
217 Self {
218 contours: Vec::new(),
219 current_contour: Vec::new(),
220 }
221 }
222
223 fn into_outlines(mut self) -> Vec<GlyphOutline> {
224 if !self.current_contour.is_empty() {
225 self.contours.push(GlyphOutline {
226 operations: std::mem::take(&mut self.current_contour).into(),
227 });
228 }
229 self.contours
230 }
231 }
232
233 impl OutlineSink for GlyphOutlineCollector {
234 fn move_to(&mut self, to: Vector2F) {
235 if !self.current_contour.is_empty() {
236 self.contours.push(GlyphOutline {
237 operations: std::mem::take(&mut self.current_contour).into(),
238 });
239 }
240 self.current_contour.push(GlyphOutlineOperation::MoveTo(OutlineMoveTo {
241 x: to.x() as i16,
242 y: to.y() as i16,
243 }));
244 }
245
246 fn line_to(&mut self, to: Vector2F) {
247 self.current_contour.push(GlyphOutlineOperation::LineTo(OutlineLineTo {
248 x: to.x() as i16,
249 y: to.y() as i16,
250 }));
251 }
252
253 fn quadratic_curve_to(&mut self, ctrl: Vector2F, to: Vector2F) {
254 self.current_contour.push(GlyphOutlineOperation::QuadraticCurveTo(
255 OutlineQuadTo {
256 ctrl_1_x: ctrl.x() as i16,
257 ctrl_1_y: ctrl.y() as i16,
258 end_x: to.x() as i16,
259 end_y: to.y() as i16,
260 },
261 ));
262 }
263
264 fn cubic_curve_to(&mut self, ctrl: LineSegment2F, to: Vector2F) {
265 self.current_contour.push(GlyphOutlineOperation::CubicCurveTo(
266 OutlineCubicTo {
267 ctrl_1_x: ctrl.from_x() as i16,
268 ctrl_1_y: ctrl.from_y() as i16,
269 ctrl_2_x: ctrl.to_x() as i16,
270 ctrl_2_y: ctrl.to_y() as i16,
271 end_x: to.x() as i16,
272 end_y: to.y() as i16,
273 },
274 ));
275 }
276
277 fn close(&mut self) {
278 self.current_contour.push(GlyphOutlineOperation::ClosePath);
279 self.contours.push(GlyphOutline {
280 operations: std::mem::take(&mut self.current_contour).into(),
281 });
282 }
283 }
284
285 /// Parsed font data with all required tables for text layout and PDF generation.
286 ///
287 /// This struct holds the parsed representation of a TrueType/OpenType font,
288 /// including glyph outlines, metrics, and shaping tables. It's used for:
289 /// - Text layout (via GSUB/GPOS tables)
290 /// - Glyph rendering (via glyf/CFF outlines)
291 /// - PDF font embedding (via font metrics and subsetting)
292 pub struct ParsedFont {
293 /// Hash of the font bytes for caching and equality checks.
294 pub hash: u64,
295 /// Layout-specific font metrics (ascent, descent, line gap).
296 pub font_metrics: LayoutFontMetrics,
297 /// PDF-specific detailed font metrics from HEAD, HHEA, OS/2 tables.
298 pub pdf_font_metrics: PdfFontMetrics,
299 /// Total number of glyphs in the font (from maxp table).
300 pub num_glyphs: u16,
301 /// Horizontal header table (hhea) containing global horizontal metrics.
302 pub hhea_table: HheaTable,
303 /// Offset+length into original_bytes for hmtx table (lazy: no copy).
304 pub hmtx_range: (usize, usize),
305 /// Offset+length into original_bytes for vmtx table (lazy: no copy).
306 pub vmtx_range: (usize, usize),
307 /// Vertical header table (vhea), same format as hhea. None if font has no vertical metrics.
308 pub vhea_table: Option<HheaTable>,
309 /// Maximum profile table (maxp) containing glyph count and memory hints.
310 pub maxp_table: MaxpTable,
311 /// Raw GSUB table bytes, kept as a `Vec<u8>` (tens to low-hundreds
312 /// of KiB) so the parsed `GsubCache` can be built on first shape
313 /// call instead of up-front. Access via [`ParsedFont::gsub`] —
314 /// that getter populates `gsub_cache_lazy` via `OnceLock` and
315 /// returns a borrow.
316 pub(crate) gsub_bytes: Option<Vec<u8>>,
317 /// Lazy GSUB cache: populated on first [`ParsedFont::gsub`] call.
318 /// `None` means "font has no GSUB table" *after* init attempt;
319 /// the `OnceLock` wrapper distinguishes "not yet initialised"
320 /// from "initialised to None".
321 pub(crate) gsub_cache_lazy: std::sync::OnceLock<Option<GsubCache>>,
322 /// Raw GPOS table bytes. Same lazy-parse arrangement as
323 /// `gsub_bytes` — see [`ParsedFont::gpos`].
324 pub(crate) gpos_bytes: Option<Vec<u8>>,
325 /// Lazy GPOS cache, populated on first [`ParsedFont::gpos`] call.
326 pub(crate) gpos_cache_lazy: std::sync::OnceLock<Option<GposCache>>,
327 /// Glyph definition table (GDEF) for glyph classification.
328 pub opt_gdef_table: Option<Arc<GDEFTable>>,
329 /// Legacy kerning table (kern) for fonts without GPOS.
330 pub opt_kern_table: Option<Arc<KernTable>>,
331 /// Monotonic-clock nanos at the most recent
332 /// [`ParsedFont::get_or_decode_glyph`] / `gsub()` / `gpos()`
333 /// call. `0` means "never touched". Used by
334 /// [`crate::text3::cache::FontManager::evict_unused`] to
335 /// decide which `LocaGlyfState::Deferred` faces to release.
336 pub(crate) last_used: Arc<std::sync::atomic::AtomicU64>,
337 /// `true` if this font is a variable font (carries a `gvar`
338 /// table). Cached at parse time so [`decode_glyph_inner`]
339 /// can short-circuit the variable-context construction for
340 /// the common non-variable case. Variable-glyph delta
341 /// application requires the source bytes to be retained,
342 /// so it only fires on the `LocaGlyfState::Deferred` path.
343 pub(crate) is_variable_font: bool,
344 /// Lazy outline cache. Populated on first
345 /// [`ParsedFont::get_or_decode_glyph`] call per `gid`; entries
346 /// are wrapped in `Arc` so callers can hold them without
347 /// keeping the lock. The space glyph (and `.notdef` when
348 /// present) are pre-inserted by `from_bytes_internal` so the
349 /// shaper's cmap-miss path has something to render without
350 /// racing with a decode.
351 ///
352 /// Tests that previously walked the public `glyph_records_decoded`
353 /// `BTreeMap` field now call
354 /// [`ParsedFont::prime_glyph_cache`] (decodes every glyph into
355 /// this cache) followed by
356 /// [`ParsedFont::for_each_decoded_glyph`] /
357 /// [`ParsedFont::glyph_cache_snapshot`] to walk the result.
358 pub(crate) glyph_cache: Arc<std::sync::RwLock<BTreeMap<u16, Arc<OwnedGlyph>>>>,
359 /// Glyph outline decoder state.
360 ///
361 /// - `Loaded(Some(arc))`: `LocaGlyf` is already loaded (owning
362 /// its own `Box<[u8]>` copy of the loca+glyf tables) and
363 /// ready to decode glyphs. Produced by the eager `from_bytes`
364 /// constructor path (tests).
365 /// - `Loaded(None)`: the font has no usable loca+glyf (CFF, or
366 /// a parse failure). Glyph outlines won't decode; the hmtx
367 /// advance fallback fills in the blanks.
368 /// - `Deferred`: we retain an `Arc<[u8]>` to the full font file
369 /// and the `font_index`; the first `get_or_decode_glyph` call
370 /// parses a fresh `FontData` / `TableProvider` from those
371 /// bytes and loads `LocaGlyf`, storing the result in the
372 /// `OnceLock`. Fonts that get resolved into a chain but are
373 /// never actually rasterized pay zero decode cost — this is
374 /// the big win for pages like `excel.html` where 20+ fallback
375 /// faces load but only a handful are touched.
376 pub(crate) loca_glyf: LocaGlyfState,
377 /// Cached width of the space character in font units.
378 pub space_width: Option<usize>,
379 /// Character-to-glyph mapping (cmap subtable).
380 pub cmap_subtable: Option<OwnedCmapSubtable>,
381 /// Mock font data for testing (replaces real font behavior).
382 pub mock: Option<Box<MockFont>>,
383 /// Reverse mapping: glyph_id -> cluster text (handles ligatures like "fi").
384 pub reverse_glyph_cache: std::collections::BTreeMap<u16, String>,
385 /// Original font bytes — only retained for callers that need to
386 /// reconstruct or subset the font (PDF export). Layout / shaping /
387 /// raster never read this, so `ParsedFont::from_bytes` leaves it
388 /// as `None` by default and callers opt in via
389 /// [`ParsedFont::with_source_bytes`]. Shared across faces of the
390 /// same `.ttc` via the `Arc<FontBytes>` that
391 /// [`rust_fontconfig::FcFontCache::get_font_bytes`] returns —
392 /// for disk fonts the backing is an mmap so untouched pages
393 /// don't count toward RSS.
394 pub original_bytes: Option<std::sync::Arc<rust_fontconfig::FontBytes>>,
395 /// Font index within collection (0 for single-font files).
396 pub original_index: usize,
397 /// GID to CID mapping for CFF fonts (required for PDF embedding).
398 pub index_to_cid: BTreeMap<u16, u16>,
399 /// Font type (TrueType outlines or OpenType CFF).
400 pub font_type: FontType,
401 /// PostScript font name from the NAME table.
402 pub font_name: Option<String>,
403 /// TrueType bytecode hinting instance (mutable interpreter state).
404 /// Wrapped in Mutex because hinting mutates internal state.
405 /// None for CFF fonts or fonts without hinting data.
406 pub hint_instance: Option<std::sync::Mutex<allsorts::hinting::HintInstance>>,
407 }
408
409 impl Clone for ParsedFont {
410 fn clone(&self) -> Self {
411 ParsedFont {
412 hash: self.hash,
413 font_metrics: self.font_metrics.clone(),
414 pdf_font_metrics: self.pdf_font_metrics,
415 num_glyphs: self.num_glyphs,
416 hhea_table: self.hhea_table.clone(),
417 hmtx_range: self.hmtx_range,
418 vmtx_range: self.vmtx_range,
419 vhea_table: self.vhea_table.clone(),
420 maxp_table: self.maxp_table.clone(),
421 // OnceLock<T: Clone>: Clone preserves the init state, so
422 // a clone of a parsed cache skips re-parse on first
423 // access. The raw bytes we keep around for lazy init
424 // are cloned too.
425 gsub_bytes: self.gsub_bytes.clone(),
426 gsub_cache_lazy: self.gsub_cache_lazy.clone(),
427 gpos_bytes: self.gpos_bytes.clone(),
428 gpos_cache_lazy: self.gpos_cache_lazy.clone(),
429 opt_gdef_table: self.opt_gdef_table.clone(),
430 opt_kern_table: self.opt_kern_table.clone(),
431 // Share the lazy cache and loca_glyf across clones: cheap
432 // Arc bump, amortises glyph decode across clones of the
433 // same face.
434 last_used: Arc::clone(&self.last_used),
435 is_variable_font: self.is_variable_font,
436 glyph_cache: Arc::clone(&self.glyph_cache),
437 // `LocaGlyfState` is `Clone` — for `Loaded` this is an
438 // `Arc::clone`; for `Deferred` it's an `Arc::clone` of
439 // the bytes + the `OnceLock`, so a clone of a face
440 // that's already decoded glyphs carries the decode.
441 loca_glyf: self.loca_glyf.clone(),
442 space_width: self.space_width,
443 cmap_subtable: self.cmap_subtable.clone(),
444 mock: self.mock.clone(),
445 reverse_glyph_cache: self.reverse_glyph_cache.clone(),
446 // Arc clone — O(1), just bumps refcount; no byte copy.
447 original_bytes: self.original_bytes.clone(),
448 original_index: self.original_index,
449 index_to_cid: self.index_to_cid.clone(),
450 font_type: self.font_type.clone(),
451 font_name: self.font_name.clone(),
452 // HintInstance has mutable interpreter state and is not Clone.
453 // Clones are used for PDF/serialization where hinting isn't needed.
454 hint_instance: None,
455 }
456 }
457 }
458
459 /// Distinguishes TrueType fonts from OpenType CFF fonts.
460 ///
461 /// This affects how glyph outlines are extracted and how the font
462 /// is embedded in PDF documents.
463 #[derive(Debug, Clone, PartialEq)]
464 pub enum FontType {
465 /// TrueType font with quadratic Bézier outlines in glyf table.
466 TrueType,
467 /// OpenType font with cubic Bézier outlines in CFF table.
468 /// Contains the serialized CFF data for PDF embedding.
469 OpenTypeCFF(Vec<u8>),
470 }
471
472 /// PDF-specific font metrics from HEAD, HHEA, and OS/2 tables.
473 ///
474 /// These metrics are used for PDF font descriptors and accurate
475 /// text positioning in generated PDF documents.
476 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
477 #[repr(C)]
478 pub struct PdfFontMetrics {
479 // -- HEAD table fields --
480 /// Font units per em-square (typically 1000 or 2048).
481 pub units_per_em: u16,
482 /// Font flags (italic, bold, fixed-pitch, etc.).
483 pub font_flags: u16,
484 /// Minimum x-coordinate across all glyphs.
485 pub x_min: i16,
486 /// Minimum y-coordinate across all glyphs.
487 pub y_min: i16,
488 /// Maximum x-coordinate across all glyphs.
489 pub x_max: i16,
490 /// Maximum y-coordinate across all glyphs.
491 pub y_max: i16,
492
493 // -- HHEA table fields --
494 /// Typographic ascender (distance above baseline).
495 pub ascender: i16,
496 /// Typographic descender (distance below baseline, usually negative).
497 pub descender: i16,
498 /// Recommended line gap between lines of text.
499 pub line_gap: i16,
500 /// Maximum horizontal advance width across all glyphs.
501 pub advance_width_max: u16,
502 /// Caret slope rise for italic angle calculation.
503 pub caret_slope_rise: i16,
504 /// Caret slope run for italic angle calculation.
505 pub caret_slope_run: i16,
506
507 // -- OS/2 table fields (0 if table not present) --
508 /// Average width of lowercase letters.
509 pub x_avg_char_width: i16,
510 /// Visual weight class (100-900, 400=normal, 700=bold).
511 pub us_weight_class: u16,
512 /// Visual width class (1-9, 5=normal).
513 pub us_width_class: u16,
514 /// Thickness of strikeout stroke in font units.
515 pub y_strikeout_size: i16,
516 /// Vertical position of strikeout stroke.
517 pub y_strikeout_position: i16,
518 }
519
520 impl Default for PdfFontMetrics {
521 fn default() -> Self {
522 PdfFontMetrics::zero()
523 }
524 }
525
526 impl PdfFontMetrics {
527 /// Returns zeroed metrics with `units_per_em` set to 1000 (standard PostScript default)
528 /// to avoid division-by-zero in scaling calculations.
529 pub const fn zero() -> Self {
530 PdfFontMetrics {
531 units_per_em: 1000,
532 font_flags: 0,
533 x_min: 0,
534 y_min: 0,
535 x_max: 0,
536 y_max: 0,
537 ascender: 0,
538 descender: 0,
539 line_gap: 0,
540 advance_width_max: 0,
541 caret_slope_rise: 0,
542 caret_slope_run: 0,
543 x_avg_char_width: 0,
544 us_weight_class: 0,
545 us_width_class: 0,
546 y_strikeout_size: 0,
547 y_strikeout_position: 0,
548 }
549 }
550 }
551
552 /// Result of font subsetting operation.
553 ///
554 /// Contains the subsetted font bytes and a mapping from original
555 /// glyph IDs to new glyph IDs in the subset.
556 #[derive(Debug, Clone)]
557 pub struct SubsetFont {
558 /// The subsetted font file bytes (smaller than original).
559 pub bytes: Vec<u8>,
560 /// Mapping: original glyph ID -> (new subset glyph ID, source character).
561 pub glyph_mapping: BTreeMap<u16, (u16, char)>,
562 }
563
564 impl SubsetFont {
565 /// Return the changed text so that when rendering with the subset font (instead of the
566 /// original) the renderer will end up at the same glyph IDs as if we used the original text
567 /// on the original font
568 pub fn subset_text(&self, text: &str) -> String {
569 text.chars()
570 .filter_map(|c| {
571 self.glyph_mapping.values().find_map(|(ngid, ch)| {
572 if *ch == c {
573 char::from_u32(*ngid as u32)
574 } else {
575 None
576 }
577 })
578 })
579 .collect()
580 }
581 }
582
583 /// Hash-based equality: two fonts are considered equal if their content hash matches.
584 /// This is a performance optimization — hash collisions are possible but vanishingly
585 /// unlikely (~1/2^64).
586 impl PartialEq for ParsedFont {
587 fn eq(&self, other: &Self) -> bool {
588 self.hash == other.hash
589 }
590 }
591
592 impl Eq for ParsedFont {}
593
594 const FONT_B64_START: &str = "data:font/ttf;base64,";
595
596 impl serde::Serialize for ParsedFont {
597 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
598 use base64::Engine;
599 let s = format!(
600 "{FONT_B64_START}{}",
601 base64::prelude::BASE64_STANDARD.encode(&self.to_bytes(None).unwrap_or_default())
602 );
603 s.serialize(serializer)
604 }
605 }
606
607 impl<'de> serde::Deserialize<'de> for ParsedFont {
608 fn deserialize<D: serde::Deserializer<'de>>(
609 deserializer: D,
610 ) -> Result<ParsedFont, D::Error> {
611 use base64::Engine;
612 let s = String::deserialize(deserializer)?;
613 let b64 = if s.starts_with(FONT_B64_START) {
614 let b = &s[FONT_B64_START.len()..];
615 base64::prelude::BASE64_STANDARD.decode(&b).ok()
616 } else {
617 None
618 };
619
620 let mut warnings = Vec::new();
621 ParsedFont::from_bytes(&b64.unwrap_or_default(), 0, &mut warnings).ok_or_else(|| {
622 serde::de::Error::custom(format!("Font deserialization error: {warnings:?}"))
623 })
624 }
625 }
626
627 impl fmt::Debug for ParsedFont {
628 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
629 f.debug_struct("ParsedFont")
630 .field("hash", &self.hash)
631 .field("font_metrics", &self.font_metrics)
632 .field("num_glyphs", &self.num_glyphs)
633 .field("hhea_table", &self.hhea_table)
634 .field(
635 "hmtx_range",
636 &format_args!("<{} bytes>", self.hmtx_range.1),
637 )
638 .field("maxp_table", &self.maxp_table)
639 .field(
640 "glyph_cache",
641 &format_args!(
642 "{} entries (lazy)",
643 self.glyph_cache.read().map(|m| m.len()).unwrap_or(0),
644 ),
645 )
646 .field("space_width", &self.space_width)
647 .field("cmap_subtable", &self.cmap_subtable)
648 .finish()
649 }
650 }
651
652 /// Warning or error message generated during font parsing.
653 #[derive(Debug, Clone, PartialEq, Eq)]
654 pub struct FontParseWarning {
655 /// Severity level of this warning.
656 pub severity: FontParseWarningSeverity,
657 /// Human-readable description of the issue.
658 pub message: String,
659 }
660
661 /// Severity level for font parsing warnings.
662 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
663 pub enum FontParseWarningSeverity {
664 /// Informational message (not an error).
665 Info,
666 /// Warning that may affect font rendering.
667 Warning,
668 /// Error that prevents proper font usage.
669 Error,
670 }
671
672 impl FontParseWarning {
673 /// Creates an info-level message.
674 pub fn info(message: String) -> Self {
675 Self {
676 severity: FontParseWarningSeverity::Info,
677 message,
678 }
679 }
680
681 /// Creates a warning-level message.
682 pub fn warning(message: String) -> Self {
683 Self {
684 severity: FontParseWarningSeverity::Warning,
685 message,
686 }
687 }
688
689 /// Creates an error-level message.
690 pub fn error(message: String) -> Self {
691 Self {
692 severity: FontParseWarningSeverity::Error,
693 message,
694 }
695 }
696 }
697
698 impl ParsedFont {
699 /// Parse a font from bytes using allsorts
700 ///
701 /// # Arguments
702 /// * `font_bytes` - The font file data
703 /// * `font_index` - Index of the font in a font collection (0 for single fonts)
704 /// * `warnings` - Optional vector to collect parsing warnings
705 ///
706 /// # Returns
707 /// `Some(ParsedFont)` if parsing succeeds, `None` otherwise
708 ///
709 /// Note: Outlines are decoded lazily by `get_or_decode_glyph`;
710 /// `LocaGlyf::load` runs eagerly here. Use `from_bytes_shared`
711 /// for the lazy-LocaGlyf production path.
712 pub fn from_bytes(
713 font_bytes: &[u8],
714 font_index: usize,
715 warnings: &mut Vec<FontParseWarning>,
716 ) -> Option<Self> {
717 // `from_bytes` keeps the eager-LocaGlyf behaviour for the
718 // small number of callers (mainly tests) that don't have
719 // an `Arc<[u8]>` to keep alive for the lazy path.
720 Self::from_bytes_internal(font_bytes, font_index, warnings, false)
721 }
722
723 /// Shared implementation of `from_bytes` / `from_bytes_shared`.
724 ///
725 /// `defer_loca_glyf = true` skips the `LocaGlyf::load` call
726 /// here so the caller (`from_bytes_shared`) can install a
727 /// `LocaGlyfState::Deferred` slot that re-parses on first
728 /// glyph decode. Saves the load-then-drop cycle the previous
729 /// arrangement paid (`from_bytes_shared` used to call
730 /// `from_bytes` and immediately replace the loaded LocaGlyf
731 /// with a Deferred slot, throwing away ~hundreds of KiB of
732 /// loca+glyf bytes per face for fonts in the chain that get
733 /// loaded but never rasterized).
734 fn from_bytes_internal(
735 font_bytes: &[u8],
736 font_index: usize,
737 warnings: &mut Vec<FontParseWarning>,
738 defer_loca_glyf: bool,
739 ) -> Option<Self> {
740 use std::{
741 collections::hash_map::DefaultHasher,
742 hash::{Hash, Hasher},
743 };
744
745 use allsorts::{
746 binary::read::ReadScope,
747 font_data::FontData,
748 tables::{
749 cmap::{owned::CmapSubtable as OwnedCmapSubtable, CmapSubtable},
750 FontTableProvider, HeadTable, HheaTable, MaxpTable,
751 },
752 tag,
753 };
754
755 let scope = ReadScope::new(font_bytes);
756 let font_file = match scope.read::<FontData<'_>>() {
757 Ok(ff) => ff,
758 Err(e) => {
759 warnings.push(FontParseWarning::error(format!(
760 "Failed to read font data: {}",
761 e
762 )));
763 return None;
764 }
765 };
766 let provider = match font_file.table_provider(font_index) {
767 Ok(p) => p,
768 Err(e) => {
769 warnings.push(FontParseWarning::error(format!(
770 "Failed to get table provider for font index {}: {}",
771 font_index, e
772 )));
773 return None;
774 }
775 };
776
777 // Extract font name from NAME table early (before provider is moved)
778 let font_name = provider.table_data(tag::NAME).ok().and_then(|name_data| {
779 ReadScope::new(&name_data?)
780 .read::<allsorts::tables::NameTable>()
781 .ok()
782 .and_then(|name_table| {
783 name_table.string_for_id(allsorts::tables::NameTable::POSTSCRIPT_NAME)
784 })
785 });
786
787 let head_table = provider
788 .table_data(tag::HEAD)
789 .ok()
790 .and_then(|head_data| ReadScope::new(&head_data?).read::<HeadTable>().ok())?;
791
792 let maxp_table = provider
793 .table_data(tag::MAXP)
794 .ok()
795 .and_then(|maxp_data| ReadScope::new(&maxp_data?).read::<MaxpTable>().ok())
796 .unwrap_or(MaxpTable {
797 num_glyphs: 0,
798 version1_sub_table: None,
799 });
800
801 let num_glyphs = maxp_table.num_glyphs as usize;
802
803 // Compute byte offset+length into font_bytes for hmtx/vmtx
804 // instead of copying the table data. The provider returns a
805 // borrowed slice for OpenType fonts, so we can derive the
806 // offset via pointer arithmetic.
807 let hmtx_range = provider
808 .table_data(tag::HMTX)
809 .ok()
810 .and_then(|cow_opt| {
811 let cow = cow_opt?;
812 match cow {
813 std::borrow::Cow::Borrowed(slice) => {
814 let base = font_bytes.as_ptr() as usize;
815 let ptr = slice.as_ptr() as usize;
816 let offset = ptr.checked_sub(base)?;
817 if offset + slice.len() <= font_bytes.len() {
818 Some((offset, slice.len()))
819 } else {
820 None
821 }
822 }
823 std::borrow::Cow::Owned(_) => None,
824 }
825 })
826 .unwrap_or((0, 0));
827
828 let vmtx_range = provider
829 .table_data(tag::VMTX)
830 .ok()
831 .and_then(|s| {
832 let slice = s?;
833 let base = font_bytes.as_ptr() as usize;
834 let ptr = slice.as_ptr() as usize;
835 let offset = ptr.checked_sub(base)?;
836 if offset + slice.len() <= font_bytes.len() {
837 Some((offset, slice.len()))
838 } else {
839 None
840 }
841 })
842 .unwrap_or((0, 0));
843
844 // Parse vhea table (same format as hhea, used for vertical metrics)
845 let vhea_table = provider
846 .table_data(tag::VHEA)
847 .ok()
848 .and_then(|vhea_data| ReadScope::new(&vhea_data?).read::<HheaTable>().ok());
849
850 // hhea is required per the OpenType spec; return None if missing
851 let hhea_table = provider
852 .table_data(tag::HHEA)
853 .ok()
854 .and_then(|hhea_data| ReadScope::new(&hhea_data?).read::<HheaTable>().ok())?;
855
856 // Build layout-specific font metrics
857 let font_metrics = LayoutFontMetrics {
858 units_per_em: if head_table.units_per_em == 0 {
859 1000
860 } else {
861 head_table.units_per_em
862 },
863 ascent: hhea_table.ascender as f32,
864 descent: hhea_table.descender as f32,
865 line_gap: hhea_table.line_gap as f32,
866 x_height: None, // will be populated from OS/2 table via from_font_metrics if available
867 cap_height: None,
868 };
869
870 // Build PDF-specific font metrics
871 let pdf_font_metrics =
872 Self::parse_pdf_font_metrics(font_bytes, font_index, &head_table, &hhea_table);
873
874 // Use allsorts LocaGlyf for on-demand outline extraction. We
875 // *load* LocaGlyf eagerly (it owns ~tens of KiB of loca +
876 // ~hundreds of KiB of glyf bytes) but we *don't* decode any
877 // glyph outlines up front — that's the big RSS win. Glyphs
878 // are decoded by `ParsedFont::get_or_decode_glyph` on first
879 // access from the CPU/GPU rasterizer.
880 //
881 // When `defer_loca_glyf` is set (production lazy path via
882 // `from_bytes_shared`), we skip `LocaGlyf::load` here too —
883 // the caller will overwrite the slot with
884 // `LocaGlyfState::Deferred` carrying the source bytes
885 // `Arc<[u8]>`, and the load happens on the first
886 // `get_or_decode_glyph` call. This avoids parsing
887 // ~hundreds of KiB per face for fonts that get resolved
888 // into a chain but never actually rasterized (typical
889 // for fallback fonts in CSS chains).
890 let has_glyf = provider.has_table(tag::GLYF) && provider.has_table(tag::LOCA);
891 // Cache `has_gvar` before `provider` gets moved into
892 // `allsorts::font::Font::new(provider)` further down —
893 // it's the cheapest way to detect a variable font and
894 // avoids the borrow-after-move that a later
895 // `provider.has_table(tag::GVAR)` would incur.
896 let has_gvar = provider.has_table(tag::GVAR);
897 let loca_glyf_opt: Option<Arc<std::sync::Mutex<LocaGlyf>>> = if has_glyf
898 && !defer_loca_glyf
899 {
900 match LocaGlyf::load(&provider) {
901 Ok(lg) => Some(Arc::new(std::sync::Mutex::new(lg))),
902 Err(e) => {
903 warnings.push(FontParseWarning::warning(format!(
904 "Failed to load LocaGlyf: {} — falling back to hmtx-only", e
905 )));
906 None
907 }
908 }
909 } else {
910 None
911 };
912
913 // Lazy `glyph_cache` starts empty; the space-glyph stub
914 // below pre-inserts gid 0 / space so the shaper's
915 // cmap-miss fallback has something to render without
916 // racing with a decode.
917
918 let mut font_data_impl = allsorts::font::Font::new(provider).ok()?;
919
920 // Create TrueType hinting instance from font tables
921 let hint_instance = allsorts::hinting::HintInstance::new(
922 &font_data_impl.font_table_provider
923 ).ok().flatten().map(|h| std::sync::Mutex::new(h));
924
925 // Stash raw GSUB/GPOS bytes for lazy parse. Typical fonts
926 // have ~tens of KiB of GSUB + a few-to-tens of KiB of GPOS —
927 // dwarfed by glyph outlines — so we keep the bytes around
928 // and only spend `LayoutTable::read` + `new_layout_cache`
929 // cycles when the shaper actually needs them (via
930 // `ParsedFont::gsub` / `::gpos`). For an ASCII run where no
931 // substitution / kerning is required, we skip both entirely.
932 let gsub_bytes = font_data_impl
933 .font_table_provider
934 .table_data(tag::GSUB)
935 .ok()
936 .flatten()
937 .map(|c| c.into_owned());
938 let gpos_bytes = font_data_impl
939 .font_table_provider
940 .table_data(tag::GPOS)
941 .ok()
942 .flatten()
943 .map(|c| c.into_owned());
944 let opt_gdef_table = font_data_impl.gdef_table().ok().and_then(|o| o);
945 let num_glyphs = font_data_impl.num_glyphs();
946
947 let opt_kern_table = font_data_impl
948 .kern_table()
949 .ok()
950 .and_then(|s| Some(s?.to_owned()));
951
952 let cmap_data = font_data_impl.cmap_subtable_data();
953 let cmap_subtable = ReadScope::new(cmap_data);
954 let cmap_subtable = cmap_subtable
955 .read::<CmapSubtable<'_>>()
956 .ok()
957 .and_then(|s| s.to_owned());
958
959 // Font identity hash — used by `PartialEq` for ParsedFont.
960 //
961 // Previously we did `font_bytes.hash(&mut hasher)` over
962 // the full mmap. That touched every page of the file
963 // (a 40 MiB `.ttc` walked byte-for-byte) so the "lazy
964 // mmap" ended up *fully resident* the moment we built
965 // a `ParsedFont`. Cold RSS jumped ~40 MiB from this
966 // single line.
967 //
968 // The hash doesn't need to be cryptographic — it just
969 // has to disambiguate two `ParsedFont`s. `(len, first
970 // 4 KiB, last 4 KiB, font_index)` is plenty unique and
971 // only faults in the two header / trailer pages, which
972 // shaping is going to need anyway.
973 let mut hasher = DefaultHasher::new();
974 (font_bytes.len() as u64).hash(&mut hasher);
975 let head_len = font_bytes.len().min(4096);
976 font_bytes[..head_len].hash(&mut hasher);
977 let tail_start = font_bytes.len().saturating_sub(4096);
978 font_bytes[tail_start..].hash(&mut hasher);
979 font_index.hash(&mut hasher);
980 let hash = hasher.finish();
981
982 let mut font = ParsedFont {
983 hash,
984 font_metrics,
985 pdf_font_metrics,
986 num_glyphs,
987 hhea_table,
988 hmtx_range,
989 vmtx_range,
990 vhea_table,
991 maxp_table,
992 gsub_bytes,
993 gsub_cache_lazy: std::sync::OnceLock::new(),
994 gpos_bytes,
995 gpos_cache_lazy: std::sync::OnceLock::new(),
996 opt_gdef_table,
997 opt_kern_table,
998 cmap_subtable,
999 last_used: Arc::new(std::sync::atomic::AtomicU64::new(0)),
1000 is_variable_font: has_gvar,
1001 glyph_cache: Arc::new(std::sync::RwLock::new(BTreeMap::new())),
1002 // Eager path: `from_bytes` loaded LocaGlyf immediately
1003 // (or set None if the font has no loca+glyf). Lazy
1004 // callers use `from_bytes_shared` which replaces this
1005 // with `LocaGlyfState::Deferred` before returning.
1006 loca_glyf: LocaGlyfState::Loaded(loca_glyf_opt),
1007 space_width: None,
1008 mock: None,
1009 reverse_glyph_cache: BTreeMap::new(),
1010 // Don't retain the source bytes by default — layout and
1011 // raster don't need them. PDF subsetting / `to_bytes`
1012 // callers opt in via `with_source_bytes`.
1013 original_bytes: None,
1014 original_index: font_index,
1015 index_to_cid: BTreeMap::new(), // Will be filled for CFF fonts
1016 font_type: FontType::TrueType, // Default, will be updated if CFF
1017 font_name,
1018 hint_instance,
1019 };
1020
1021 // Calculate space width
1022 let space_width = font.get_space_width_internal();
1023
1024 // Pre-decode the space glyph straight into the lazy
1025 // `glyph_cache`. Space typically has no outline, so the
1026 // decoder's outline visitor returns nothing useful and
1027 // we'd spin re-decoding it every shape — short-circuit
1028 // here with a hand-rolled record carrying the hmtx
1029 // advance.
1030 let _ = (|| {
1031 let space_gid = font.lookup_glyph_index(' ' as u32)?;
1032 if let Ok(cache) = font.glyph_cache.read() {
1033 if cache.contains_key(&space_gid) {
1034 return None;
1035 }
1036 }
1037 let space_width_val = space_width?;
1038 let space_record = OwnedGlyph {
1039 bounding_box: OwnedGlyphBoundingBox {
1040 max_x: 0,
1041 max_y: 0,
1042 min_x: 0,
1043 min_y: 0,
1044 },
1045 horz_advance: space_width_val as u16,
1046 outline: Vec::new(),
1047 phantom_points: None,
1048 raw_points: None,
1049 raw_on_curve: None,
1050 raw_contour_ends: None,
1051 instructions: None,
1052 };
1053 if let Ok(mut cache) = font.glyph_cache.write() {
1054 cache.insert(space_gid, Arc::new(space_record));
1055 }
1056 Some(())
1057 })();
1058
1059 font.space_width = space_width;
1060
1061 Some(font)
1062 }
1063
1064 /// Attach the source font bytes to this `ParsedFont`, enabling
1065 /// [`ParsedFont::to_bytes`] and [`ParsedFont::subset`] (both of
1066 /// which the layout / shaping path never calls).
1067 ///
1068 /// Takes an `Arc<FontBytes>` so the same file's bytes can be
1069 /// shared across every face of a `.ttc` at zero extra cost —
1070 /// pair with [`rust_fontconfig::FcFontCache::get_font_bytes`].
1071 /// For ad-hoc PDF callers that have raw heap bytes, wrap them
1072 /// via `Arc::new(FontBytes::Owned(Arc::from(vec)))`.
1073 pub fn with_source_bytes(mut self, bytes: std::sync::Arc<rust_fontconfig::FontBytes>) -> Self {
1074 self.original_bytes = Some(bytes);
1075 self
1076 }
1077
1078 /// Lazy-friendly constructor — identical to
1079 /// [`ParsedFont::from_bytes`] except that `LocaGlyf` is
1080 /// **not** loaded during the call. Instead, the supplied
1081 /// `Arc<[u8]>` is retained and `LocaGlyf::load` runs the first
1082 /// time [`get_or_decode_glyph`] needs glyph outlines for this
1083 /// face.
1084 ///
1085 /// Fonts that get resolved into a CSS fallback chain but are
1086 /// never actually rasterized (common on desktop — e.g. every
1087 /// face of HelveticaNeue.ttc loads, but only one or two are
1088 /// shaped) then pay zero loca/glyf cost.
1089 ///
1090 /// Production callers (the reftest harness, `LayoutWindow`,
1091 /// `cpurender`) should prefer this constructor. Tests that
1092 /// inspect `glyph_records_decoded` directly and don't want
1093 /// a lazy path keep using `from_bytes`.
1094 pub fn from_bytes_shared(
1095 bytes: std::sync::Arc<rust_fontconfig::FontBytes>,
1096 font_index: usize,
1097 warnings: &mut Vec<FontParseWarning>,
1098 ) -> Option<Self> {
1099 // Skip the eager LocaGlyf::load via `defer_loca_glyf=true`
1100 // — saves the load-then-drop cycle the prior arrangement
1101 // paid (when this called `from_bytes`, allocated
1102 // ~hundreds of KiB of loca+glyf bytes, then immediately
1103 // replaced the slot with `Deferred` and dropped them).
1104 // `bytes.as_ref()` derefs FontBytes → &[u8] (mmap or owned
1105 // — same code path).
1106 let mut font = Self::from_bytes_internal(bytes.as_ref(), font_index, warnings, true)?;
1107 font.original_bytes = Some(bytes.clone());
1108 font.loca_glyf = LocaGlyfState::Deferred {
1109 bytes,
1110 font_index,
1111 loaded: Arc::new(std::sync::Mutex::new(None)),
1112 };
1113 Some(font)
1114 }
1115
1116 /// Resolve the current face's `LocaGlyf`, loading it lazily
1117 /// on first call when `loca_glyf` is `Deferred`. Returns
1118 /// `None` when the font has no usable loca+glyf (CFF fonts
1119 /// or parse failures).
1120 fn resolve_loca_glyf(&self) -> Option<Arc<std::sync::Mutex<LocaGlyf>>> {
1121 match &self.loca_glyf {
1122 LocaGlyfState::Loaded(inner) => inner.clone(),
1123 LocaGlyfState::Deferred { bytes, font_index, loaded } => {
1124 // Fast path: cached LocaGlyf is present.
1125 if let Ok(guard) = loaded.lock() {
1126 if let Some(arc) = guard.as_ref() {
1127 return Some(Arc::clone(arc));
1128 }
1129 }
1130 let _p = crate::probe::Probe::span("resolve_loca_glyf");
1131
1132 // Slow path: parse provider + load LocaGlyf without
1133 // holding the slot's lock (allsorts can take a
1134 // millisecond or two on a fresh load). Re-check
1135 // after acquiring the write lock so a parallel
1136 // decoder doesn't double-load.
1137 use allsorts::{
1138 binary::read::ReadScope,
1139 font_data::FontData,
1140 tables::FontTableProvider,
1141 };
1142 let scope = ReadScope::new(bytes.as_slice());
1143 let font_data = scope.read::<FontData<'_>>().ok()?;
1144 let provider = font_data.table_provider(*font_index).ok()?;
1145 // Gate on table presence to match the `from_bytes`
1146 // has_glyf check; avoids a spurious warning on
1147 // CFF fonts that sneak into the Deferred path.
1148 if !provider.has_table(tag::GLYF) || !provider.has_table(tag::LOCA) {
1149 return None;
1150 }
1151 let new_arc = LocaGlyf::load(&provider)
1152 .ok()
1153 .map(|lg| Arc::new(std::sync::Mutex::new(lg)))?;
1154
1155 if let Ok(mut guard) = loaded.lock() {
1156 if let Some(existing) = guard.as_ref() {
1157 return Some(Arc::clone(existing));
1158 }
1159 *guard = Some(Arc::clone(&new_arc));
1160 }
1161 Some(new_arc)
1162 }
1163 }
1164 }
1165
1166 /// Source bytes for PDF subsetting / table extraction.
1167 ///
1168 /// Looks in two places:
1169 /// - `original_bytes` (set by [`ParsedFont::with_source_bytes`]
1170 /// for legacy PDF-first construction).
1171 /// - `LocaGlyfState::Deferred.bytes` (set by
1172 /// [`ParsedFont::from_bytes_shared`] — the production lazy
1173 /// path, which already retains an `Arc<[u8]>` for the lazy
1174 /// loca/glyf loader).
1175 ///
1176 /// Returns `None` only for `ParsedFont`s built via the eager
1177 /// `from_bytes` path without an explicit `with_source_bytes`
1178 /// call — i.e. unit tests that load a font and don't touch
1179 /// PDF.
1180 pub fn source_bytes_for_subset(&self) -> Option<std::sync::Arc<rust_fontconfig::FontBytes>> {
1181 if let Some(bytes) = &self.original_bytes {
1182 return Some(std::sync::Arc::clone(bytes));
1183 }
1184 if let LocaGlyfState::Deferred { bytes, .. } = &self.loca_glyf {
1185 return Some(std::sync::Arc::clone(bytes));
1186 }
1187 None
1188 }
1189
1190 /// Read the monotonic-clock nanos timestamp of the most
1191 /// recent [`get_or_decode_glyph`] call on this face, or `0`
1192 /// if it's never been touched.
1193 pub fn last_used_nanos(&self) -> u64 {
1194 self.last_used.load(std::sync::atomic::Ordering::Relaxed)
1195 }
1196
1197 /// Drop the cached `LocaGlyf` for this face if it's
1198 /// `Deferred`-with-bytes-retained — so the next
1199 /// [`get_or_decode_glyph`] re-parses from `bytes`. No-op for
1200 /// `Loaded` faces (no source bytes to fall back to).
1201 ///
1202 /// Used by [`crate::text3::cache::FontManager::evict_unused`]
1203 /// and exposed publicly so embedders can free memory under
1204 /// pressure on fonts they no longer need to render.
1205 pub fn evict_loca_glyf(&self) -> bool {
1206 match &self.loca_glyf {
1207 LocaGlyfState::Deferred { loaded, .. } => {
1208 if let Ok(mut guard) = loaded.lock() {
1209 if guard.is_some() {
1210 *guard = None;
1211 return true;
1212 }
1213 }
1214 false
1215 }
1216 LocaGlyfState::Loaded(_) => false,
1217 }
1218 }
1219
1220 /// Fetch the parsed GSUB cache if this font has one, parsing
1221 /// it from the retained `gsub_bytes` on first access.
1222 ///
1223 /// Moved out of the eager `from_bytes` path because most text
1224 /// runs never trigger GSUB — plain ASCII without ligatures is
1225 /// handled entirely by the cmap + hmtx fast path. Building
1226 /// `LayoutCacheData<GSUB>` up front reserved ~0.5–2 MiB per
1227 /// face just to throw it away on pages that don't shape
1228 /// complex scripts.
1229 pub fn gsub(&self) -> Option<&GsubCache> {
1230 self.gsub_cache_lazy
1231 .get_or_init(|| {
1232 use allsorts::{
1233 binary::read::ReadScope,
1234 layout::{new_layout_cache, LayoutTable, GSUB},
1235 };
1236 let bytes = self.gsub_bytes.as_ref()?;
1237 ReadScope::new(bytes)
1238 .read::<LayoutTable<GSUB>>()
1239 .ok()
1240 .map(new_layout_cache)
1241 })
1242 .as_ref()
1243 }
1244
1245 /// Fetch the parsed GPOS cache if this font has one, parsing
1246 /// it from the retained `gpos_bytes` on first access. See
1247 /// [`ParsedFont::gsub`] for the motivation.
1248 pub fn gpos(&self) -> Option<&GposCache> {
1249 self.gpos_cache_lazy
1250 .get_or_init(|| {
1251 use allsorts::{
1252 binary::read::ReadScope,
1253 layout::{new_layout_cache, LayoutTable, GPOS},
1254 };
1255 let bytes = self.gpos_bytes.as_ref()?;
1256 ReadScope::new(bytes)
1257 .read::<LayoutTable<GPOS>>()
1258 .ok()
1259 .map(new_layout_cache)
1260 })
1261 .as_ref()
1262 }
1263
1264 /// Fetch an `OwnedGlyph` for `gid`, decoding it on first access.
1265 ///
1266 /// Cached in the `Arc<RwLock<…>>` `glyph_cache` so subsequent
1267 /// calls (including across clones of this `ParsedFont`) hit the
1268 /// cache. Returns `None` when `gid >= num_glyphs` or the font
1269 /// has no loca+glyf and no hmtx entry for the glyph. For CFF
1270 /// fonts the returned record has an empty outline and an advance
1271 /// pulled from hmtx — matching the pre-lazy behaviour.
1272 ///
1273 /// Called on the rasterizer hot path; performance budget is a
1274 /// few µs per unique glyph (first hit) and an Arc bump + BTreeMap
1275 /// lookup (cache hits). The write lock is held only across the
1276 /// decode, not across the caller's use of the returned Arc.
1277 pub fn get_or_decode_glyph(&self, gid: u16) -> Option<std::sync::Arc<OwnedGlyph>> {
1278 use std::sync::Arc;
1279 if usize::from(gid) >= self.num_glyphs.min(u16::MAX) as usize {
1280 return None;
1281 }
1282 // Bump the LRU timestamp so `FontManager::evict_unused`
1283 // can tell this face is still in use. Cheap atomic store
1284 // (Relaxed — eviction reads the same atomic and tolerates
1285 // a slightly stale value, which only causes "evict, then
1286 // re-load on next access" — never an incorrect render).
1287 self.last_used
1288 .store(monotonic_now_nanos(), std::sync::atomic::Ordering::Relaxed);
1289
1290 // Fast path: cache hit.
1291 if let Ok(cache) = self.glyph_cache.read() {
1292 if let Some(existing) = cache.get(&gid) {
1293 return Some(Arc::clone(existing));
1294 }
1295 }
1296
1297 // Miss: decode. We drop the read lock before taking the
1298 // write lock to avoid deadlock, and we re-check on the way
1299 // in because another thread may have decoded the same glyph
1300 // in between.
1301 let record = self.decode_glyph_inner(gid);
1302 let arc = Arc::new(record);
1303 if let Ok(mut cache) = self.glyph_cache.write() {
1304 cache
1305 .entry(gid)
1306 .or_insert_with(|| Arc::clone(&arc));
1307 // If another thread beat us to the insert, return theirs
1308 // so all callers observe the same Arc.
1309 if let Some(winner) = cache.get(&gid) {
1310 return Some(Arc::clone(winner));
1311 }
1312 }
1313 Some(arc)
1314 }
1315
1316 /// Eagerly decode every glyph into the lazy `glyph_cache`,
1317 /// restoring the pre-lazy "every glyph is materialised at
1318 /// construction time" behaviour. Used by tests that iterate
1319 /// or compare against reference tooling, and by embedders
1320 /// that want a walkable view without driving every shape
1321 /// through `get_or_decode_glyph`.
1322 ///
1323 /// After `prime_glyph_cache`, callers can use
1324 /// [`ParsedFont::for_each_decoded_glyph`] or
1325 /// [`ParsedFont::glyph_cache_snapshot`] to observe the
1326 /// populated cache.
1327 pub fn prime_glyph_cache(&mut self) {
1328 let n = self.num_glyphs.min(u16::MAX) as usize;
1329 for glyph_index in 0..n {
1330 let gid = glyph_index as u16;
1331 let _ = self.get_or_decode_glyph(gid);
1332 }
1333 }
1334
1335 /// Walk every entry currently in the lazy `glyph_cache`,
1336 /// invoking `f(gid, &OwnedGlyph)` for each. Holds a read
1337 /// lock for the duration; do not call back into the font
1338 /// from `f`. The cache is populated on demand by
1339 /// [`ParsedFont::get_or_decode_glyph`] (and bulk-prefilled
1340 /// by [`ParsedFont::prime_glyph_cache`]).
1341 pub fn for_each_decoded_glyph<F: FnMut(u16, &OwnedGlyph)>(&self, mut f: F) {
1342 if let Ok(cache) = self.glyph_cache.read() {
1343 for (gid, glyph) in cache.iter() {
1344 f(*gid, glyph.as_ref());
1345 }
1346 }
1347 }
1348
1349 /// Snapshot of the currently-decoded glyphs as a
1350 /// `BTreeMap<u16, Arc<OwnedGlyph>>`. Cheap (clones the
1351 /// Arcs, not the records). Used by callers that want to
1352 /// hand the map off across an API boundary; for in-place
1353 /// iteration prefer [`ParsedFont::for_each_decoded_glyph`].
1354 pub fn glyph_cache_snapshot(&self) -> BTreeMap<u16, Arc<OwnedGlyph>> {
1355 self.glyph_cache
1356 .read()
1357 .map(|c| c.clone())
1358 .unwrap_or_default()
1359 }
1360
1361 /// Core decode routine: produces one `OwnedGlyph` for `gid` by
1362 /// locking `loca_glyf` and running allsorts' outline visitor +
1363 /// raw-simple-glyph extraction. Factored out so both
1364 /// [`get_or_decode_glyph`] and [`prime_glyph_cache`] share it.
1365 ///
1366 /// Always returns an `OwnedGlyph` — if anything in the decode
1367 /// chain fails, falls back to an empty-outline record with the
1368 /// `hmtx` advance. This mirrors the pre-lazy behaviour where
1369 /// every gid ended up in `glyph_records_decoded`.
1370 fn hmtx_bytes(&self) -> &[u8] {
1371 let (off, len) = self.hmtx_range;
1372 if len == 0 { return &[]; }
1373 self.original_bytes.as_ref()
1374 .map(|b| &b.as_ref()[off..off+len])
1375 .unwrap_or(&[])
1376 }
1377
1378 fn vmtx_bytes(&self) -> &[u8] {
1379 let (off, len) = self.vmtx_range;
1380 if len == 0 { return &[]; }
1381 self.original_bytes.as_ref()
1382 .map(|b| &b.as_ref()[off..off+len])
1383 .unwrap_or(&[])
1384 }
1385
1386 fn decode_glyph_inner(&self, gid: u16) -> OwnedGlyph {
1387 let _p = crate::probe::Probe::span("decode_glyph");
1388 let horz_advance = allsorts::glyph_info::advance(
1389 &self.maxp_table,
1390 &self.hhea_table,
1391 self.hmtx_bytes(),
1392 gid,
1393 )
1394 .unwrap_or_default();
1395
1396 let mut record = OwnedGlyph {
1397 horz_advance,
1398 bounding_box: OwnedGlyphBoundingBox {
1399 min_x: 0,
1400 min_y: 0,
1401 max_x: horz_advance as i16,
1402 max_y: 0,
1403 },
1404 outline: Vec::new(),
1405 phantom_points: None,
1406 raw_points: None,
1407 raw_on_curve: None,
1408 raw_contour_ends: None,
1409 instructions: None,
1410 };
1411
1412 // Resolve the `LocaGlyf` for this face. For `Loaded` that's
1413 // a cheap `Arc::clone`; for `Deferred` this is where the
1414 // actual `LocaGlyf::load` happens on first access, paid once
1415 // per face that ever decodes a glyph.
1416 let Some(loca_glyf_arc) = self.resolve_loca_glyf() else {
1417 return record;
1418 };
1419 let Ok(mut loca_glyf) = loca_glyf_arc.lock() else {
1420 return record;
1421 };
1422
1423 // Visit the outline. If this is a variable font (gvar
1424 // table present) AND we still have source bytes (only
1425 // the `LocaGlyfState::Deferred` path retains them), we
1426 // re-derive a `VariableGlyfContext` here so default-
1427 // instance vs designed-instance differences land in
1428 // the decoded outline. The chained `if let` pattern
1429 // keeps `provider` and `store` in scope for the
1430 // visit, which the borrow checker requires (the
1431 // store's `Cow::Borrowed(&[u8])` tables tie its
1432 // lifetime to the provider).
1433 //
1434 // Eager-`from_bytes` faces (no retained bytes) and
1435 // non-variable fonts skip the var-context machinery
1436 // and decode the default instance — same behaviour as
1437 // before R4.
1438 let mut outline_done = false;
1439 if self.is_variable_font {
1440 if let LocaGlyfState::Deferred { bytes, .. } = &self.loca_glyf {
1441 let scope = allsorts::binary::read::ReadScope::new(bytes);
1442 if let Ok(font_data) =
1443 scope.read::<allsorts::font_data::FontData<'_>>()
1444 {
1445 if let Ok(provider) = font_data.table_provider(self.original_index) {
1446 if let Ok(store) = VariableGlyfContextStore::read(&provider) {
1447 if let Ok(var_ctx) = VariableGlyfContext::new(&store) {
1448 let mut visitor = GlyfVisitorContext::new(
1449 &mut *loca_glyf,
1450 Some(var_ctx),
1451 );
1452 let mut collector = GlyphOutlineCollector::new();
1453 if visitor.visit(gid, None, &mut collector).is_ok() {
1454 record.outline = collector.into_outlines();
1455 let (min_x, min_y, max_x, max_y) =
1456 compute_outline_bbox(&record.outline);
1457 record.bounding_box = OwnedGlyphBoundingBox {
1458 min_x,
1459 min_y,
1460 max_x,
1461 max_y,
1462 };
1463 outline_done = true;
1464 }
1465 }
1466 }
1467 }
1468 }
1469 }
1470 }
1471 if !outline_done {
1472 let mut visitor =
1473 GlyfVisitorContext::new(&mut *loca_glyf, None);
1474 let mut collector = GlyphOutlineCollector::new();
1475 if visitor.visit(gid, None, &mut collector).is_ok() {
1476 record.outline = collector.into_outlines();
1477 let (min_x, min_y, max_x, max_y) =
1478 compute_outline_bbox(&record.outline);
1479 record.bounding_box = OwnedGlyphBoundingBox {
1480 min_x,
1481 min_y,
1482 max_x,
1483 max_y,
1484 };
1485 }
1486 }
1487
1488 // Second pass: pull raw SimpleGlyph data for TrueType
1489 // bytecode hinting. LocaGlyf caches the `Arc<Glyph>`
1490 // internally so this lookup is cheap after the first call.
1491 if let Ok(glyph_arc) = loca_glyf.glyph(gid) {
1492 if let allsorts::tables::glyf::Glyph::Simple(sg) = glyph_arc.as_ref() {
1493 record.raw_points = Some(
1494 sg.coordinates.iter().map(|(_, pt)| (pt.0, pt.1)).collect(),
1495 );
1496 record.raw_on_curve = Some(
1497 sg.coordinates.iter().map(|(f, _)| f.is_on_curve()).collect(),
1498 );
1499 record.raw_contour_ends = Some(sg.end_pts_of_contours.clone());
1500 record.instructions = Some(sg.instructions.to_vec());
1501 }
1502 }
1503
1504 record
1505 }
1506
1507 /// Parse PDF-specific font metrics from HEAD, HHEA, and OS/2 tables
1508 fn parse_pdf_font_metrics(
1509 font_bytes: &[u8],
1510 font_index: usize,
1511 head_table: &allsorts::tables::HeadTable,
1512 hhea_table: &allsorts::tables::HheaTable,
1513 ) -> PdfFontMetrics {
1514 use allsorts::{
1515 binary::read::ReadScope,
1516 font_data::FontData,
1517 tables::{os2::Os2, FontTableProvider},
1518 tag,
1519 };
1520
1521 let scope = ReadScope::new(font_bytes);
1522 let font_file = scope.read::<FontData<'_>>().ok();
1523 let provider = font_file
1524 .as_ref()
1525 .and_then(|ff| ff.table_provider(font_index).ok());
1526
1527 let os2_table = provider
1528 .as_ref()
1529 .and_then(|p| p.table_data(tag::OS_2).ok())
1530 .and_then(|os2_data| {
1531 let data = os2_data?;
1532 let scope = ReadScope::new(&data);
1533 scope.read_dep::<Os2>(data.len()).ok()
1534 });
1535
1536 // Base metrics from HEAD and HHEA (always present)
1537 let base = PdfFontMetrics {
1538 units_per_em: head_table.units_per_em,
1539 font_flags: head_table.flags,
1540 x_min: head_table.x_min,
1541 y_min: head_table.y_min,
1542 x_max: head_table.x_max,
1543 y_max: head_table.y_max,
1544 ascender: hhea_table.ascender,
1545 descender: hhea_table.descender,
1546 line_gap: hhea_table.line_gap,
1547 advance_width_max: hhea_table.advance_width_max,
1548 caret_slope_rise: hhea_table.caret_slope_rise,
1549 caret_slope_run: hhea_table.caret_slope_run,
1550 ..PdfFontMetrics::zero()
1551 };
1552
1553 // Add OS/2 metrics if available
1554 os2_table
1555 .map(|os2| PdfFontMetrics {
1556 x_avg_char_width: os2.x_avg_char_width,
1557 us_weight_class: os2.us_weight_class,
1558 us_width_class: os2.us_width_class,
1559 y_strikeout_size: os2.y_strikeout_size,
1560 y_strikeout_position: os2.y_strikeout_position,
1561 ..base
1562 })
1563 .unwrap_or(base)
1564 }
1565
1566 /// Returns the width of the space character in font units.
1567 ///
1568 /// This is used internally for text layout calculations.
1569 /// Returns `None` if the font has no space glyph or its width cannot be determined.
1570 fn get_space_width_internal(&self) -> Option<usize> {
1571 if let Some(mock) = self.mock.as_ref() {
1572 return mock.space_width;
1573 }
1574 let glyph_index = self.lookup_glyph_index(' ' as u32)?;
1575
1576 allsorts::glyph_info::advance(
1577 &self.maxp_table,
1578 &self.hhea_table,
1579 self.hmtx_bytes(),
1580 glyph_index,
1581 )
1582 .ok()
1583 .map(|s| s as usize)
1584 }
1585
1586 /// Look up the glyph index for a Unicode codepoint
1587 pub fn lookup_glyph_index(&self, codepoint: u32) -> Option<u16> {
1588 let cmap = self.cmap_subtable.as_ref()?;
1589 cmap.map_glyph(codepoint).ok().flatten()
1590 }
1591
1592 /// Get the horizontal advance width for a glyph in font units.
1593 ///
1594 /// Pulled straight from the `hmtx` table — no glyph-outline
1595 /// decode. Called once per shaped glyph per layout pass, so
1596 /// avoiding the lazy decode here is a meaningful win over
1597 /// routing through `get_or_decode_glyph`.
1598 pub fn get_horizontal_advance(&self, glyph_index: u16) -> u16 {
1599 if let Some(mock) = self.mock.as_ref() {
1600 return mock.glyph_advances.get(&glyph_index).copied().unwrap_or(0);
1601 }
1602 allsorts::glyph_info::advance(
1603 &self.maxp_table,
1604 &self.hhea_table,
1605 self.hmtx_bytes(),
1606 glyph_index,
1607 )
1608 .unwrap_or_default()
1609 }
1610
1611 /// Get the hinted advance width in pixels for a glyph at the given ppem.
1612 ///
1613 /// For glyphs with outlines, runs TrueType bytecode hinting to get the
1614 /// grid-fitted advance from phantom points. For glyphs without outlines
1615 /// (e.g. space), rounds the scaled advance to the pixel grid, matching
1616 /// FreeType's behavior.
1617 ///
1618 /// Returns `None` if hinting is not available or fails.
1619 pub fn get_hinted_advance_px(&self, glyph_index: u16, ppem: u16) -> Option<f32> {
1620 let glyph = self.get_or_decode_glyph(glyph_index)?;
1621
1622 let upem = self.font_metrics.units_per_em;
1623 if upem == 0 || ppem == 0 {
1624 return None;
1625 }
1626
1627 // Check if we even have a hint instance
1628 let _hint_mutex = self.hint_instance.as_ref()?;
1629
1630 use allsorts::hinting::f26dot6::{compute_scale, F26Dot6};
1631 let scale = compute_scale(ppem, upem);
1632 let adv_f26dot6 = F26Dot6::from_funits(glyph.horz_advance as i32, scale);
1633
1634 // For glyphs with outline data, run bytecode hinting
1635 if let (Some(raw_points), Some(raw_on_curve), Some(raw_contour_ends)) = (
1636 glyph.raw_points.as_ref(),
1637 glyph.raw_on_curve.as_ref(),
1638 glyph.raw_contour_ends.as_ref(),
1639 ) {
1640 let instructions = glyph.instructions.as_deref().unwrap_or(&[]);
1641 let mut hint = _hint_mutex.lock().ok()?;
1642 hint.set_ppem(ppem, ppem as f64).ok()?;
1643
1644 let points_f26dot6: Vec<(i32, i32)> = raw_points
1645 .iter()
1646 .map(|&(x, y)| {
1647 let sx = F26Dot6::from_funits(x as i32, scale);
1648 let sy = F26Dot6::from_funits(y as i32, scale);
1649 (sx.to_bits(), sy.to_bits())
1650 })
1651 .collect();
1652
1653 // Use the scaled advance rounded to pixel grid, NOT the hinted
1654 // phantom point. Some glyph programs apply ClearType-specific
1655 // SHPIX adjustments to the advance phantom point that are wrong
1656 // for non-ClearType rendering. The rounded scaled advance matches
1657 // FreeType's DEFAULT mode advance output.
1658 let rounded = (adv_f26dot6.to_bits() + 32) & !63;
1659 Some(rounded as f32 / 64.0)
1660 } else {
1661 // No outline (e.g. space): use scaled advance, rounded to grid
1662 // (matching FreeType's phantom point pre-rounding)
1663 let rounded = (adv_f26dot6.to_bits() + 32) & !63;
1664 Some(rounded as f32 / 64.0)
1665 }
1666 }
1667
1668 /// Get the number of glyphs in this font
1669 pub fn num_glyphs(&self) -> u16 {
1670 self.num_glyphs
1671 }
1672
1673 /// Check if this font has a glyph for the given codepoint
1674 pub fn has_glyph(&self, codepoint: u32) -> bool {
1675 self.lookup_glyph_index(codepoint).is_some()
1676 }
1677
1678 /// Get vertical metrics for a glyph (for vertical text layout).
1679 ///
1680 /// Uses vhea+vmtx tables (same binary format as hhea+hmtx).
1681 /// Returns None if font has no vertical metrics tables.
1682 pub fn get_vertical_metrics(
1683 &self,
1684 glyph_id: u16,
1685 ) -> Option<crate::text3::cache::VerticalMetrics> {
1686 let vhea = self.vhea_table.as_ref()?;
1687 if self.vmtx_range.1 == 0 {
1688 return None;
1689 }
1690 let vert_advance = allsorts::glyph_info::advance(
1691 &self.maxp_table, vhea, self.vmtx_bytes(), glyph_id,
1692 ).ok()? as f32;
1693
1694 let units_per_em = self.font_metrics.units_per_em as f32;
1695 let scale = if units_per_em > 0.0 { 1.0 / units_per_em } else { 0.001 };
1696
1697 // Vertical bearing: approximate from glyph bbox if available
1698 let (bearing_x, bearing_y) = self.get_or_decode_glyph(glyph_id)
1699 .map(|g| {
1700 let bbox = &g.bounding_box;
1701 // tsb (top side bearing): origin_y - max_y
1702 // lsb for vertical: center the glyph horizontally
1703 let width = (bbox.max_x - bbox.min_x) as f32;
1704 (-(width / 2.0) * scale, (vert_advance * scale) - (bbox.max_y as f32 * scale))
1705 })
1706 .unwrap_or((0.0, 0.0));
1707
1708 Some(crate::text3::cache::VerticalMetrics {
1709 advance: vert_advance * scale,
1710 bearing_x,
1711 bearing_y,
1712 origin_y: self.font_metrics.ascent * scale,
1713 })
1714 }
1715
1716 /// Get layout-specific font metrics
1717 pub fn get_font_metrics(&self) -> crate::text3::cache::LayoutFontMetrics {
1718 // Ensure descent is positive (OpenType may have negative descent)
1719 let descent = if self.font_metrics.descent > 0.0 {
1720 self.font_metrics.descent
1721 } else {
1722 -self.font_metrics.descent
1723 };
1724
1725 crate::text3::cache::LayoutFontMetrics {
1726 ascent: self.font_metrics.ascent,
1727 descent,
1728 line_gap: self.font_metrics.line_gap,
1729 units_per_em: self.font_metrics.units_per_em,
1730 x_height: self.font_metrics.x_height,
1731 cap_height: self.font_metrics.cap_height,
1732 }
1733 }
1734
1735 /// Convert the ParsedFont back to bytes using allsorts::whole_font
1736 /// This reconstructs the entire font from the parsed data
1737 ///
1738 /// Source bytes come from either the explicit
1739 /// [`ParsedFont::with_source_bytes`] handle (PDF-first
1740 /// construction) *or* the `LocaGlyfState::Deferred` slot
1741 /// installed by [`ParsedFont::from_bytes_shared`]. The
1742 /// production lazy path retains bytes for the lazy LocaGlyf
1743 /// loader, so PDF subsetting Just Works without an extra
1744 /// `with_source_bytes` call.
1745 ///
1746 /// # Arguments
1747 /// * `tags` - Optional list of specific table tags to include (None = all tables)
1748 pub fn to_bytes(&self, tags: Option<&[u32]>) -> Result<Vec<u8>, String> {
1749 let source = self.source_bytes_for_subset().ok_or_else(|| {
1750 "ParsedFont::to_bytes requires source bytes; construct via \
1751 ParsedFont::from_bytes_shared (production lazy path) or \
1752 attach via ParsedFont::with_source_bytes"
1753 .to_string()
1754 })?;
1755 let scope = ReadScope::new(source.as_slice());
1756 let font_file = scope.read::<FontData<'_>>().map_err(|e| e.to_string())?;
1757 let provider = font_file
1758 .table_provider(self.original_index)
1759 .map_err(|e| e.to_string())?;
1760
1761 let tags_to_use = tags.unwrap_or(&[
1762 tag::CMAP,
1763 tag::HEAD,
1764 tag::HHEA,
1765 tag::HMTX,
1766 tag::MAXP,
1767 tag::NAME,
1768 tag::OS_2,
1769 tag::POST,
1770 tag::GLYF,
1771 tag::LOCA,
1772 ]);
1773
1774 whole_font(&provider, tags_to_use).map_err(|e| e.to_string())
1775 }
1776
1777 /// Create a subset font containing only the specified glyph IDs
1778 /// Returns the subset font bytes and a mapping from old to new glyph IDs
1779 ///
1780 /// # Arguments
1781 /// * `glyph_ids` - The glyph IDs to include in the subset (glyph 0/.notdef is always
1782 /// included)
1783 /// * `cmap_target` - Target cmap format (Unicode for web, MacRoman for compatibility)
1784 ///
1785 /// # Returns
1786 /// A tuple of (subset_font_bytes, glyph_mapping) where glyph_mapping maps
1787 /// original_glyph_id -> (new_glyph_id, original_char)
1788 pub fn subset(
1789 &self,
1790 glyph_ids: &[(u16, char)],
1791 cmap_target: CmapTarget,
1792 ) -> Result<(Vec<u8>, BTreeMap<u16, (u16, char)>), String> {
1793 let source = self.source_bytes_for_subset().ok_or_else(|| {
1794 "ParsedFont::subset requires source bytes; construct via \
1795 ParsedFont::from_bytes_shared (production lazy path) or \
1796 attach via ParsedFont::with_source_bytes"
1797 .to_string()
1798 })?;
1799 let scope = ReadScope::new(source.as_slice());
1800 let font_file = scope.read::<FontData<'_>>().map_err(|e| e.to_string())?;
1801 let provider = font_file
1802 .table_provider(self.original_index)
1803 .map_err(|e| e.to_string())?;
1804
1805 // Build glyph mapping: original_id -> (new_id, char)
1806 let glyph_mapping: BTreeMap<u16, (u16, char)> = glyph_ids
1807 .iter()
1808 .enumerate()
1809 .map(|(new_id, &(original_id, ch))| (original_id, (new_id as u16, ch)))
1810 .collect();
1811
1812 // Extract just the glyph IDs for subsetting
1813 let ids: Vec<u16> = glyph_ids.iter().map(|(id, _)| *id).collect();
1814
1815 // Use PDF profile for embedding fonts in PDFs
1816 let font_bytes = allsorts_subset(&provider, &ids, &SubsetProfile::Pdf, cmap_target)
1817 .map_err(|e| format!("Subset error: {:?}", e))?;
1818
1819 Ok((font_bytes, glyph_mapping))
1820 }
1821
1822 /// Get the width of a glyph in font units (internal, unscaled)
1823 pub fn get_glyph_width_internal(&self, glyph_index: u16) -> Option<usize> {
1824 allsorts::glyph_info::advance(
1825 &self.maxp_table,
1826 &self.hhea_table,
1827 self.hmtx_bytes(),
1828 glyph_index,
1829 )
1830 .ok()
1831 .map(|s| s as usize)
1832 }
1833
1834 /// Get the width of the space character (unscaled font units)
1835 #[inline]
1836 pub const fn get_space_width(&self) -> Option<usize> {
1837 self.space_width
1838 }
1839
1840 /// Add glyph-to-text mapping to reverse cache
1841 /// This should be called during text shaping when we know both the source text and
1842 /// resulting glyphs
1843 pub fn cache_glyph_mapping(&mut self, glyph_id: u16, cluster_text: &str) {
1844 self.reverse_glyph_cache
1845 .insert(glyph_id, cluster_text.to_string());
1846 }
1847
1848 /// Get the cluster text that produced a specific glyph ID
1849 /// Returns the original text that was shaped into this glyph (handles ligatures correctly)
1850 pub fn get_glyph_cluster_text(&self, glyph_id: u16) -> Option<&str> {
1851 self.reverse_glyph_cache.get(&glyph_id).map(|s| s.as_str())
1852 }
1853
1854 /// Get the first character from the cluster text for a glyph ID
1855 /// This is useful for PDF ToUnicode CMap generation which requires single character
1856 /// mappings
1857 pub fn get_glyph_primary_char(&self, glyph_id: u16) -> Option<char> {
1858 self.reverse_glyph_cache
1859 .get(&glyph_id)
1860 .and_then(|text| text.chars().next())
1861 }
1862
1863 /// Clear the reverse glyph cache (useful for memory management)
1864 pub fn clear_glyph_cache(&mut self) {
1865 self.reverse_glyph_cache.clear();
1866 }
1867
1868 /// Get the bounding box size of a glyph (unscaled units) - for PDF
1869 /// Returns (width, height) in font units
1870 pub fn get_glyph_bbox_size(&self, glyph_index: u16) -> Option<(i32, i32)> {
1871 let g = self.get_or_decode_glyph(glyph_index)?;
1872 let glyph_width = g.horz_advance as i32;
1873 let glyph_height = g.bounding_box.max_y as i32 - g.bounding_box.min_y as i32;
1874 Some((glyph_width, glyph_height))
1875 }
1876 }
1877
1878 /// Compute the bounding box from collected glyph outlines.
1879 fn compute_outline_bbox(outlines: &[GlyphOutline]) -> (i16, i16, i16, i16) {
1880 let mut min_x = i16::MAX;
1881 let mut min_y = i16::MAX;
1882 let mut max_x = i16::MIN;
1883 let mut max_y = i16::MIN;
1884 let mut has_points = false;
1885
1886 for outline in outlines {
1887 for op in outline.operations.as_slice() {
1888 let points: &[(i16, i16)] = match op {
1889 GlyphOutlineOperation::MoveTo(m) => &[(m.x, m.y)],
1890 GlyphOutlineOperation::LineTo(l) => &[(l.x, l.y)],
1891 GlyphOutlineOperation::QuadraticCurveTo(q) => {
1892 // Check both control and end point for bbox
1893 min_x = min_x.min(q.ctrl_1_x).min(q.end_x);
1894 min_y = min_y.min(q.ctrl_1_y).min(q.end_y);
1895 max_x = max_x.max(q.ctrl_1_x).max(q.end_x);
1896 max_y = max_y.max(q.ctrl_1_y).max(q.end_y);
1897 has_points = true;
1898 continue;
1899 }
1900 GlyphOutlineOperation::CubicCurveTo(c) => {
1901 min_x = min_x.min(c.ctrl_1_x).min(c.ctrl_2_x).min(c.end_x);
1902 min_y = min_y.min(c.ctrl_1_y).min(c.ctrl_2_y).min(c.end_y);
1903 max_x = max_x.max(c.ctrl_1_x).max(c.ctrl_2_x).max(c.end_x);
1904 max_y = max_y.max(c.ctrl_1_y).max(c.ctrl_2_y).max(c.end_y);
1905 has_points = true;
1906 continue;
1907 }
1908 GlyphOutlineOperation::ClosePath => continue,
1909 };
1910 for &(x, y) in points {
1911 min_x = min_x.min(x);
1912 min_y = min_y.min(y);
1913 max_x = max_x.max(x);
1914 max_y = max_y.max(y);
1915 has_points = true;
1916 }
1917 }
1918 }
1919
1920 if has_points {
1921 (min_x, min_y, max_x, max_y)
1922 } else {
1923 (0, 0, 0, 0)
1924 }
1925 }
1926
1927 #[derive(Debug, Clone)]
1928 pub struct OwnedGlyph {
1929 pub bounding_box: OwnedGlyphBoundingBox,
1930 pub horz_advance: u16,
1931 pub outline: Vec<GlyphOutline>,
1932 pub phantom_points: Option<[Point; 4]>,
1933 /// Raw TrueType points in font units (for hinting). None for composite/CFF glyphs.
1934 pub raw_points: Option<Vec<(i16, i16)>>,
1935 /// On-curve flags for each raw point.
1936 pub raw_on_curve: Option<Vec<bool>>,
1937 /// Contour end-point indices (TrueType).
1938 pub raw_contour_ends: Option<Vec<u16>>,
1939 /// Per-glyph TrueType hinting instructions.
1940 pub instructions: Option<Vec<u8>>,
1941 }
1942
1943 // --- ParsedFontTrait Implementation for ParsedFont ---
1944
1945 impl crate::text3::cache::ShallowClone for ParsedFont {
1946 fn shallow_clone(&self) -> Self {
1947 self.clone() // ParsedFont::clone uses Arc internally, so it's shallow
1948 }
1949 }
1950
1951 impl crate::text3::cache::ParsedFontTrait for ParsedFont {
1952 fn shape_text(
1953 &self,
1954 text: &str,
1955 script: crate::font_traits::Script,
1956 language: crate::font_traits::Language,
1957 direction: crate::font_traits::BidiDirection,
1958 style: &crate::font_traits::StyleProperties,
1959 ) -> Result<Vec<crate::font_traits::Glyph>, crate::font_traits::LayoutError> {
1960 // Call the existing shape_text_for_parsed_font method (defined in default.rs)
1961 crate::text3::default::shape_text_for_parsed_font(
1962 self, text, script, language, direction, style,
1963 )
1964 }
1965
1966 fn get_hash(&self) -> u64 {
1967 self.hash
1968 }
1969
1970 fn get_glyph_size(
1971 &self,
1972 glyph_id: u16,
1973 font_size_px: f32,
1974 ) -> Option<azul_core::geom::LogicalSize> {
1975 self.get_or_decode_glyph(glyph_id).map(|record| {
1976 let units_per_em = self.font_metrics.units_per_em as f32;
1977 let scale_factor = if units_per_em > 0.0 {
1978 font_size_px / units_per_em
1979 } else {
1980 0.01
1981 };
1982 let bbox = &record.bounding_box;
1983 azul_core::geom::LogicalSize {
1984 width: (bbox.max_x - bbox.min_x) as f32 * scale_factor,
1985 height: (bbox.max_y - bbox.min_y) as f32 * scale_factor,
1986 }
1987 })
1988 }
1989
1990 fn get_hyphen_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
1991 let glyph_id = self.lookup_glyph_index('-' as u32)?;
1992 let advance_units = self.get_horizontal_advance(glyph_id);
1993 let scale_factor = if self.font_metrics.units_per_em > 0 {
1994 font_size / (self.font_metrics.units_per_em as f32)
1995 } else {
1996 return None;
1997 };
1998 let scaled_advance = advance_units as f32 * scale_factor;
1999 Some((glyph_id, scaled_advance))
2000 }
2001
2002 fn get_kashida_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
2003 let glyph_id = self.lookup_glyph_index('\u{0640}' as u32)?;
2004 let advance_units = self.get_horizontal_advance(glyph_id);
2005 let scale_factor = if self.font_metrics.units_per_em > 0 {
2006 font_size / (self.font_metrics.units_per_em as f32)
2007 } else {
2008 return None;
2009 };
2010 let scaled_advance = advance_units as f32 * scale_factor;
2011 Some((glyph_id, scaled_advance))
2012 }
2013
2014 fn has_glyph(&self, codepoint: u32) -> bool {
2015 self.lookup_glyph_index(codepoint).is_some()
2016 }
2017
2018 fn get_vertical_metrics(
2019 &self,
2020 glyph_id: u16,
2021 ) -> Option<crate::text3::cache::VerticalMetrics> {
2022 self.get_vertical_metrics(glyph_id)
2023 }
2024
2025 fn get_font_metrics(&self) -> crate::text3::cache::LayoutFontMetrics {
2026 self.font_metrics.clone()
2027 }
2028
2029 fn num_glyphs(&self) -> u16 {
2030 self.num_glyphs
2031 }
2032
2033 fn get_space_width(&self) -> Option<usize> {
2034 self.space_width
2035 }
2036 }
2037
2038 /// Build an agg-rust PathStorage from an OwnedGlyph outline (in font units, Y-up → Y-down).
2039 ///
2040 /// Returns `None` if the glyph has no outline operations (e.g. space).
2041 /// The caller is responsible for applying scale and translation transforms.
2042 #[cfg(feature = "cpurender")]
2043 pub fn build_glyph_path(glyph: &OwnedGlyph) -> Option<agg_rust::path_storage::PathStorage> {
2044 use agg_rust::{basics::PATH_FLAGS_NONE, path_storage::PathStorage};
2045
2046 let mut path = PathStorage::new();
2047 let mut has_ops = false;
2048 for outline in &glyph.outline {
2049 for op in outline.operations.as_slice() {
2050 has_ops = true;
2051 match op {
2052 GlyphOutlineOperation::MoveTo(OutlineMoveTo { x, y }) => {
2053 path.move_to(*x as f64, -(*y as f64));
2054 }
2055 GlyphOutlineOperation::LineTo(OutlineLineTo { x, y }) => {
2056 path.line_to(*x as f64, -(*y as f64));
2057 }
2058 GlyphOutlineOperation::QuadraticCurveTo(OutlineQuadTo {
2059 ctrl_1_x, ctrl_1_y, end_x, end_y,
2060 }) => {
2061 path.curve3(
2062 *ctrl_1_x as f64, -(*ctrl_1_y as f64),
2063 *end_x as f64, -(*end_y as f64),
2064 );
2065 }
2066 GlyphOutlineOperation::CubicCurveTo(OutlineCubicTo {
2067 ctrl_1_x, ctrl_1_y, ctrl_2_x, ctrl_2_y, end_x, end_y,
2068 }) => {
2069 path.curve4(
2070 *ctrl_1_x as f64, -(*ctrl_1_y as f64),
2071 *ctrl_2_x as f64, -(*ctrl_2_y as f64),
2072 *end_x as f64, -(*end_y as f64),
2073 );
2074 }
2075 GlyphOutlineOperation::ClosePath => {
2076 path.close_polygon(PATH_FLAGS_NONE);
2077 }
2078 }
2079 }
2080 }
2081 if !has_ops {
2082 return None;
2083 }
2084 Some(path)
2085 }
2086}