Skip to main content

azul_layout/text3/
default.rs

1//! Default / concrete implementations of the text3 trait abstractions.
2//!
3//! This module bridges the generic text3 layout engine and the concrete
4//! `FontRef` / `ParsedFont` types.  It provides:
5//!
6//! - `ParsedFontTrait` implementation for `FontRef`
7//! - Font loading via `PathLoader`
8//! - The core `shape_text_internal` shaping function
9
10use std::{path::Path, sync::Arc};
11
12use allsorts::{
13    gpos,
14    gsub::{self, FeatureInfo, FeatureMask, Features},
15};
16use azul_core::geom::LogicalSize;
17use azul_css::props::basic::FontRef;
18use rust_fontconfig::FcFontCache;
19
20use crate::{
21    font::parsed::ParsedFont,
22    text3::{
23        cache::{
24            BidiDirection, BidiLevel, FontManager, FontSelector, FontVariantCaps,
25            FontVariantLigatures, FontVariantNumeric, Glyph, GlyphOrientation, GlyphSource,
26            LayoutError, LayoutFontMetrics, ParsedFontTrait, Point, ShallowClone, StyleProperties,
27            TextCombineUpright, TextDecoration, TextOrientation, VerticalMetrics, WritingMode,
28        },
29        script::Script,
30    },
31};
32
33/// Creates a FontRef from font bytes by parsing them into a ParsedFont.
34///
35/// This is a bridge function that:
36///
37/// 1. Parses the bytes into a ParsedFont
38/// 2. Wraps it in a FontRef with proper reference counting
39///
40/// # Arguments
41///
42/// - `font_bytes` - The raw font file data
43/// - `font_index` - Index of the font in a font collection (0 for single fonts)
44/// - `parse_outlines` - Whether to parse glyph outlines (expensive, usually false for layout)
45pub fn font_ref_from_bytes(
46    font_bytes: &[u8],
47    font_index: usize,
48    parse_outlines: bool,
49) -> Option<FontRef> {
50    // Parse the font bytes into ParsedFont
51    let mut warnings = Vec::new();
52    let parsed_font = ParsedFont::from_bytes(font_bytes, font_index, &mut warnings)?;
53
54    Some(crate::parsed_font_to_font_ref(parsed_font))
55}
56
57/// A FontLoader that parses font data from a byte slice.
58///
59/// It is designed to be used in conjunction with a mechanism that reads font files
60/// from paths into memory. This loader simply handles the parsing aspect.
61#[derive(Debug, Default, Clone)]
62pub struct PathLoader;
63
64impl PathLoader {
65    /// Creates a new `PathLoader`.
66    pub fn new() -> Self {
67        PathLoader
68    }
69
70    /// Read a font from disk and parse via the lazy-LocaGlyf path.
71    /// Convenience wrapper for callers that have a path but no
72    /// `Arc<FontBytes>` yet — uses a heap read (`Owned`) since a
73    /// loose path won't go through the fontconfig dedup cache.
74    pub fn load_from_path(&self, path: &Path, font_index: usize) -> Result<FontRef, LayoutError> {
75        let font_bytes = std::fs::read(path).map_err(|_| {
76            LayoutError::FontNotFound(FontSelector {
77                family: path.to_string_lossy().into_owned(),
78                weight: rust_fontconfig::FcWeight::Normal,
79                style: crate::text3::cache::FontStyle::Normal,
80                unicode_ranges: Vec::new(),
81            })
82        })?;
83        let arc_owned = std::sync::Arc::<[u8]>::from(font_bytes);
84        let bytes = std::sync::Arc::new(rust_fontconfig::FontBytes::Owned(arc_owned));
85        self.load_font_shared(bytes, font_index)
86    }
87
88    /// Lazy-friendly loader: takes an `Arc<FontBytes>` (typically
89    /// from [`rust_fontconfig::FcFontCache::get_font_bytes`]) and
90    /// uses the [`ParsedFont::from_bytes_shared`] constructor so
91    /// `LocaGlyf::load` is deferred until the first glyph decode.
92    ///
93    /// This is the only loader on the production path —
94    /// `load_fonts_from_disk` calls this via the closure passed
95    /// into `FontManager::load_missing_for_chains`. Fonts that
96    /// never get rasterized (common — every face of a `.ttc` gets a
97    /// FontId, but pages only hit a couple of them) skip their
98    /// per-face loca+glyf materialisation entirely; with
99    /// `FontBytes::Mmapped` the unread pages also never count
100    /// toward RSS.
101    pub fn load_font_shared(
102        &self,
103        font_bytes: std::sync::Arc<rust_fontconfig::FontBytes>,
104        font_index: usize,
105    ) -> Result<FontRef, LayoutError> {
106        let mut warnings = Vec::new();
107        let parsed_font = ParsedFont::from_bytes_shared(font_bytes, font_index, &mut warnings)
108            .ok_or_else(|| {
109                LayoutError::ShapingError("Failed to parse font with allsorts".to_string())
110            })?;
111        Ok(crate::parsed_font_to_font_ref(parsed_font))
112    }
113}
114
115impl FontManager<FontRef> {
116    pub fn new_with_fc_cache(fc_cache: FcFontCache) -> Result<Self, LayoutError> {
117        FontManager::new(fc_cache)
118    }
119
120    /// Evict the cached `LocaGlyf` for every face that hasn't had a
121    /// `get_or_decode_glyph` call within the last `idle` duration.
122    /// Only `LocaGlyfState::Deferred` faces (the production lazy
123    /// path) can be evicted — they keep their source `Arc<[u8]>` so
124    /// the next glyph access re-parses cheaply. `LocaGlyfState::Loaded`
125    /// faces from the eager path stay put.
126    ///
127    /// Returns the number of faces evicted. Embedders can call this
128    /// from a memory-pressure hook or on a timer; servo-shot
129    /// exposes it via `--azul-evict-after-each` for measurement.
130    pub fn evict_unused(&self, idle: std::time::Duration) -> usize {
131        use crate::font::parsed::ParsedFont;
132        let parsed = match self.parsed_fonts.lock() {
133            Ok(p) => p,
134            Err(_) => return 0,
135        };
136        // We compare against the same monotonic clock the font's
137        // `last_used` is sampled from. `last_used == 0` means
138        // "never touched" -> eligible. Otherwise we only evict if
139        // `now_nanos - last_used >= idle.as_nanos()`.
140        let cutoff = idle.as_nanos() as u64;
141        let now_nanos = crate::font::parsed::monotonic_now_nanos();
142        let mut evicted = 0usize;
143        for font_ref in parsed.values() {
144            let font: &ParsedFont = get_parsed_font(font_ref);
145            let last = font.last_used_nanos();
146            // Untouched faces are eligible immediately. Touched
147            // faces need to be `idle` past their last use.
148            let stale = last == 0 || now_nanos.saturating_sub(last) >= cutoff;
149            if stale && font.evict_loca_glyf() {
150                evicted += 1;
151            }
152        }
153        evicted
154    }
155}
156
157
158// ParsedFontTrait Implementation for FontRef
159
160// Implement ShallowClone for FontRef
161impl crate::text3::cache::ShallowClone for FontRef {
162    fn shallow_clone(&self) -> Self {
163        // FontRef::clone increments the reference count
164        self.clone()
165    }
166}
167
168// Helper to get the inner ParsedFont from FontRef
169#[inline]
170fn get_parsed_font(font_ref: &FontRef) -> &ParsedFont {
171    unsafe { &*(font_ref.get_parsed() as *const ParsedFont) }
172}
173
174impl ParsedFontTrait for FontRef {
175    // +spec:block-formatting-context:21ec9a - bidi direction handled during text shaping for vertical writing modes
176    fn shape_text(
177        &self,
178        text: &str,
179        script: Script,
180        language: crate::text3::script::Language,
181        direction: BidiDirection,
182        style: &StyleProperties,
183    ) -> Result<Vec<Glyph>, LayoutError> {
184        // Delegate to the inner ParsedFont's shape_text, passing self as font_ref
185        let parsed = get_parsed_font(self);
186        parsed.shape_text_for_font_ref(self, text, script, language, direction, style)
187    }
188
189    fn get_hash(&self) -> u64 {
190        get_parsed_font(self).hash
191    }
192
193    fn get_glyph_size(&self, glyph_id: u16, font_size: f32) -> Option<LogicalSize> {
194        get_parsed_font(self).get_glyph_size(glyph_id, font_size)
195    }
196
197    fn get_hyphen_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
198        get_parsed_font(self).get_hyphen_glyph_and_advance(font_size)
199    }
200
201    fn get_kashida_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
202        get_parsed_font(self).get_kashida_glyph_and_advance(font_size)
203    }
204
205    fn has_glyph(&self, codepoint: u32) -> bool {
206        get_parsed_font(self).has_glyph(codepoint)
207    }
208
209    fn get_vertical_metrics(&self, glyph_id: u16) -> Option<VerticalMetrics> {
210        get_parsed_font(self).get_vertical_metrics(glyph_id)
211    }
212
213    fn get_font_metrics(&self) -> LayoutFontMetrics {
214        get_parsed_font(self).font_metrics.clone()
215    }
216
217    fn num_glyphs(&self) -> u16 {
218        get_parsed_font(self).num_glyphs
219    }
220
221    fn get_space_width(&self) -> Option<usize> {
222        get_parsed_font(self).get_space_width()
223    }
224}
225
226/// Extension trait for FontRef to provide access to font bytes and metrics
227///
228/// This trait provides methods that require access to the inner ParsedFont data.
229pub trait FontRefExt {
230    /// Get the original font bytes. Returns an empty slice when the
231    /// underlying `ParsedFont` was created without retaining its
232    /// source bytes (the default since the lazy-font-loading refactor).
233    /// Callers that need the bytes for PDF embedding must construct
234    /// the `ParsedFont` via `ParsedFont::with_source_bytes`.
235    fn get_bytes(&self) -> &[u8];
236    /// Get the full font metrics (PDF-style metrics from HEAD, HHEA, OS/2 tables)
237    fn get_full_font_metrics(&self) -> azul_css::props::basic::FontMetrics;
238}
239
240impl FontRefExt for FontRef {
241    fn get_bytes(&self) -> &[u8] {
242        get_parsed_font(self)
243            .original_bytes
244            .as_ref()
245            .map(|b| b.as_slice())
246            .unwrap_or(&[])
247    }
248
249    fn get_full_font_metrics(&self) -> azul_css::props::basic::FontMetrics {
250        use azul_css::{OptionI16, OptionU16, OptionU32};
251
252        let parsed = get_parsed_font(self);
253        let pdf = &parsed.pdf_font_metrics;
254
255        // PdfFontMetrics only has a subset of fields; fill others with defaults
256        azul_css::props::basic::FontMetrics {
257            // OS/2 version 1 fields (u32 - align 4, placed first)
258            ul_code_page_range1: OptionU32::None,
259            ul_code_page_range2: OptionU32::None,
260
261            // OS/2 table (u32 fields)
262            ul_unicode_range1: 0,   // Not in PdfFontMetrics
263            ul_unicode_range2: 0,   // Not in PdfFontMetrics
264            ul_unicode_range3: 0,   // Not in PdfFontMetrics
265            ul_unicode_range4: 0,   // Not in PdfFontMetrics
266            ach_vend_id: 0,         // Not in PdfFontMetrics
267
268            // OS/2 version 0 fields (optional)
269            s_typo_ascender: OptionI16::None,
270            s_typo_descender: OptionI16::None,
271            s_typo_line_gap: OptionI16::None,
272            us_win_ascent: OptionU16::None,
273            us_win_descent: OptionU16::None,
274
275            // OS/2 version 2 fields (optional)
276            sx_height: OptionI16::None,
277            s_cap_height: OptionI16::None,
278            us_default_char: OptionU16::None,
279            us_break_char: OptionU16::None,
280            us_max_context: OptionU16::None,
281
282            // OS/2 version 3 fields (optional)
283            us_lower_optical_point_size: OptionU16::None,
284            us_upper_optical_point_size: OptionU16::None,
285
286            // HEAD table fields
287            units_per_em: pdf.units_per_em,
288            font_flags: pdf.font_flags,
289            x_min: pdf.x_min,
290            y_min: pdf.y_min,
291            x_max: pdf.x_max,
292            y_max: pdf.y_max,
293
294            // HHEA table fields
295            ascender: pdf.ascender,
296            descender: pdf.descender,
297            line_gap: pdf.line_gap,
298            advance_width_max: pdf.advance_width_max,
299            min_left_side_bearing: 0,  // Not in PdfFontMetrics
300            min_right_side_bearing: 0, // Not in PdfFontMetrics
301            x_max_extent: 0,           // Not in PdfFontMetrics
302            caret_slope_rise: pdf.caret_slope_rise,
303            caret_slope_run: pdf.caret_slope_run,
304            caret_offset: 0,  // Not in PdfFontMetrics
305            num_h_metrics: 0, // Not in PdfFontMetrics
306
307            // OS/2 table fields
308            x_avg_char_width: pdf.x_avg_char_width,
309            us_weight_class: pdf.us_weight_class,
310            us_width_class: pdf.us_width_class,
311            fs_type: 0,                // Not in PdfFontMetrics
312            y_subscript_x_size: 0,     // Not in PdfFontMetrics
313            y_subscript_y_size: 0,     // Not in PdfFontMetrics
314            y_subscript_x_offset: 0,   // Not in PdfFontMetrics
315            y_subscript_y_offset: 0,   // Not in PdfFontMetrics
316            y_superscript_x_size: 0,   // Not in PdfFontMetrics
317            y_superscript_y_size: 0,   // Not in PdfFontMetrics
318            y_superscript_x_offset: 0, // Not in PdfFontMetrics
319            y_superscript_y_offset: 0, // Not in PdfFontMetrics
320            y_strikeout_size: pdf.y_strikeout_size,
321            y_strikeout_position: pdf.y_strikeout_position,
322            s_family_class: 0, // Not in PdfFontMetrics
323            fs_selection: 0,        // Not in PdfFontMetrics
324            us_first_char_index: 0, // Not in PdfFontMetrics
325            us_last_char_index: 0,  // Not in PdfFontMetrics
326
327            // Panose (align 1 - last)
328            panose: azul_css::props::basic::Panose::zero(),
329        }
330    }
331}
332
333// ParsedFont helper method for FontRef
334//
335// This allows ParsedFont to create glyphs that use FontRef
336//
337// FontRef is just a C-style Arc wrapper around ParsedFont, so we delegate to
338// the common shaping implementation and convert the font reference type.
339
340impl ParsedFont {
341    /// Internal helper that shapes text and returns Glyph
342    /// Delegates to shape_text_internal and converts the font reference.
343    fn shape_text_for_font_ref(
344        &self,
345        font_ref: &FontRef,
346        text: &str,
347        script: Script,
348        language: crate::text3::script::Language,
349        direction: BidiDirection,
350        style: &StyleProperties,
351    ) -> Result<Vec<Glyph>, LayoutError> {
352        // Use the common shaping implementation
353        let shaped = shape_text_internal(self, text, script, language, direction, style)?;
354
355        // Convert Glyph - now using font_hash and font_metrics instead of font reference
356        let font_hash = font_ref.get_hash();
357        let font_metrics = LayoutFontMetrics {
358            ascent: self.font_metrics.ascent,
359            descent: self.font_metrics.descent,
360            line_gap: self.font_metrics.line_gap,
361            units_per_em: self.font_metrics.units_per_em,
362            x_height: self.font_metrics.x_height,
363            cap_height: self.font_metrics.cap_height,
364        };
365
366        Ok(shaped
367            .into_iter()
368            .map(|g| Glyph {
369                glyph_id: g.glyph_id,
370                codepoint: g.codepoint,
371                font_hash,
372                font_metrics: font_metrics.clone(),
373                style: g.style,
374                source: g.source,
375                logical_byte_index: g.logical_byte_index,
376                logical_byte_len: g.logical_byte_len,
377                content_index: g.content_index,
378                cluster: g.cluster,
379                advance: g.advance,
380                kerning: g.kerning,
381                offset: g.offset,
382                vertical_advance: g.vertical_advance,
383                vertical_origin_y: g.vertical_origin_y,
384                vertical_bearing: g.vertical_bearing,
385                orientation: g.orientation,
386                script: g.script,
387                bidi_level: g.bidi_level,
388            })
389            .collect())
390    }
391
392    fn get_hash(&self) -> u64 {
393        self.hash
394    }
395
396    fn get_glyph_size(&self, glyph_id: u16, font_size_px: f32) -> Option<LogicalSize> {
397        self.get_or_decode_glyph(glyph_id).map(|record| {
398            let units_per_em = self.font_metrics.units_per_em as f32;
399            let scale_factor = if units_per_em > 0.0 {
400                font_size_px / units_per_em
401            } else {
402                FALLBACK_SCALE
403            };
404
405            // max_x, max_y, min_x, min_y in font units
406            let bbox = &record.bounding_box;
407
408            LogicalSize {
409                width: (bbox.max_x - bbox.min_x) as f32 * scale_factor,
410                height: (bbox.max_y - bbox.min_y) as f32 * scale_factor,
411            }
412        })
413    }
414
415    fn get_hyphen_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
416        let glyph_id = self.lookup_glyph_index('-' as u32)?;
417        let advance_units = self.get_horizontal_advance(glyph_id);
418        let scale_factor = if self.font_metrics.units_per_em > 0 {
419            font_size / (self.font_metrics.units_per_em as f32)
420        } else {
421            return None;
422        };
423        let scaled_advance = advance_units as f32 * scale_factor;
424        Some((glyph_id, scaled_advance))
425    }
426
427    fn get_kashida_glyph_and_advance(&self, font_size: f32) -> Option<(u16, f32)> {
428        // U+0640 is the Arabic Tatweel character, used for kashida justification.
429        let glyph_id = self.lookup_glyph_index('\u{0640}' as u32)?;
430        let advance_units = self.get_horizontal_advance(glyph_id);
431        let scale_factor = if self.font_metrics.units_per_em > 0 {
432            font_size / (self.font_metrics.units_per_em as f32)
433        } else {
434            return None;
435        };
436        let scaled_advance = advance_units as f32 * scale_factor;
437        Some((glyph_id, scaled_advance))
438    }
439}
440
441/// Fallback scale factor when `units_per_em` is zero (corrupt/broken font).
442const FALLBACK_SCALE: f32 = 0.01;
443
444// Helper Functions
445
446/// Builds a FeatureMask with the appropriate OpenType features for a given script.
447/// This ensures proper text shaping for complex scripts like Arabic, Devanagari, etc.
448///
449/// The function includes:
450/// - Common features for all scripts (ligatures, contextual alternates, etc.)
451/// - Script-specific features (positional forms for Arabic, conjuncts for Indic, etc.)
452///
453/// This is designed to be stable and explicit - we control exactly which features
454/// are enabled rather than relying on allsorts' defaults which may change.
455fn build_feature_mask_for_script(script: Script) -> FeatureMask {
456    use Script::*;
457
458    // Start with common features that apply to most scripts
459    let mut mask = FeatureMask::default(); // Includes: CALT, CCMP, CLIG, LIGA, LOCL, RLIG
460
461    // Add script-specific features
462    match script {
463        // Arabic and related scripts - require positional forms
464        Arabic => {
465            mask |= FeatureMask::INIT; // Initial forms (at start of word)
466            mask |= FeatureMask::MEDI; // Medial forms (middle of word)
467            mask |= FeatureMask::FINA; // Final forms (end of word)
468            mask |= FeatureMask::ISOL; // Isolated forms (standalone)
469                                       // Note: RLIG (required ligatures) already in default for
470                                       // lam-alef ligatures
471        }
472
473        // Indic scripts - require complex conjunct formation and reordering
474        Devanagari | Bengali | Gujarati | Gurmukhi | Kannada | Malayalam | Oriya | Tamil
475        | Telugu => {
476            mask |= FeatureMask::NUKT; // Nukta forms
477            mask |= FeatureMask::AKHN; // Akhand ligatures
478            mask |= FeatureMask::RPHF; // Reph form
479            mask |= FeatureMask::RKRF; // Rakar form
480            mask |= FeatureMask::PREF; // Pre-base forms
481            mask |= FeatureMask::BLWF; // Below-base forms
482            mask |= FeatureMask::ABVF; // Above-base forms
483            mask |= FeatureMask::HALF; // Half forms
484            mask |= FeatureMask::PSTF; // Post-base forms
485            mask |= FeatureMask::VATU; // Vattu variants
486            mask |= FeatureMask::CJCT; // Conjunct forms
487        }
488
489        // Myanmar (Burmese) - has complex reordering
490        Myanmar => {
491            mask |= FeatureMask::PREF; // Pre-base forms
492            mask |= FeatureMask::BLWF; // Below-base forms
493            mask |= FeatureMask::PSTF; // Post-base forms
494        }
495
496        // Khmer - has complex reordering and stacking
497        Khmer => {
498            mask |= FeatureMask::PREF; // Pre-base forms
499            mask |= FeatureMask::BLWF; // Below-base forms
500            mask |= FeatureMask::ABVF; // Above-base forms
501            mask |= FeatureMask::PSTF; // Post-base forms
502        }
503
504        // Thai - has tone marks and vowel reordering
505        Thai => {
506            // Thai mostly uses default features, but may have some special marks
507            // The default mask is sufficient for most Thai fonts
508        }
509
510        // Hebrew - may have contextual forms but less complex than Arabic
511        Hebrew => {
512            // Hebrew fonts may use contextual alternates already in default
513            // Some fonts have special features but they're rare
514        }
515
516        // Hangul (Korean) - has complex syllable composition
517        Hangul => {
518            // Note: Hangul jamo features (LJMO, VJMO, TJMO) are not available in allsorts'
519            // FeatureMask Most modern Hangul fonts work correctly with the default
520            // features as syllable composition is usually handled at a lower level
521        }
522
523        // Ethiopic - has syllabic script with some ligatures
524        Ethiopic => {
525            // Default features are usually sufficient
526            // LIGA and CLIG already in default mask
527        }
528
529        // Latin, Greek, Cyrillic - standard features are sufficient
530        Latin | Greek | Cyrillic => {
531            // Default mask includes all needed features:
532            // - LIGA: standard ligatures (fi, fl, etc.)
533            // - CLIG: contextual ligatures
534            // - CALT: contextual alternates
535            // - CCMP: mark composition
536        }
537
538        // Georgian - uses standard features
539        Georgian => {
540            // Default features sufficient
541        }
542
543        // CJK scripts (Hiragana, Katakana, Mandarin/Hani)
544        Hiragana | Katakana | Mandarin => {
545            // CJK fonts may use vertical alternates, but those are controlled
546            // by writing-mode, not GSUB features in the horizontal direction.
547            // Default features are sufficient.
548        }
549
550        // Sinhala - Indic-derived but simpler
551        Sinhala => {
552            mask |= FeatureMask::AKHN; // Akhand ligatures
553            mask |= FeatureMask::RPHF; // Reph form
554            mask |= FeatureMask::VATU; // Vattu variants
555        }
556    }
557
558    mask
559}
560
561/// Maps the layout engine's `Script` enum to an OpenType script tag `u32`.
562fn to_opentype_script_tag(script: Script) -> u32 {
563    use Script::*;
564    // Tags from https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
565    match script {
566        Arabic => u32::from_be_bytes(*b"arab"),
567        Bengali => u32::from_be_bytes(*b"beng"),
568        Cyrillic => u32::from_be_bytes(*b"cyrl"),
569        Devanagari => u32::from_be_bytes(*b"deva"),
570        Ethiopic => u32::from_be_bytes(*b"ethi"),
571        Georgian => u32::from_be_bytes(*b"geor"),
572        Greek => u32::from_be_bytes(*b"grek"),
573        Gujarati => u32::from_be_bytes(*b"gujr"),
574        Gurmukhi => u32::from_be_bytes(*b"guru"),
575        Hangul => u32::from_be_bytes(*b"hang"),
576        Hebrew => u32::from_be_bytes(*b"hebr"),
577        // OpenType does not define a separate Hiragana script tag;
578        // both Hiragana and Katakana intentionally use "kana".
579        Hiragana => u32::from_be_bytes(*b"kana"),
580        Kannada => u32::from_be_bytes(*b"knda"),
581        Katakana => u32::from_be_bytes(*b"kana"),
582        Khmer => u32::from_be_bytes(*b"khmr"),
583        Latin => u32::from_be_bytes(*b"latn"),
584        Malayalam => u32::from_be_bytes(*b"mlym"),
585        Mandarin => u32::from_be_bytes(*b"hani"),
586        Myanmar => u32::from_be_bytes(*b"mymr"),
587        Oriya => u32::from_be_bytes(*b"orya"),
588        Sinhala => u32::from_be_bytes(*b"sinh"),
589        Tamil => u32::from_be_bytes(*b"taml"),
590        Telugu => u32::from_be_bytes(*b"telu"),
591        Thai => u32::from_be_bytes(*b"thai"),
592    }
593}
594
595/// Parses a CSS-style font-feature-settings string like `"liga"`, `"liga=0"`, or `"ss01"`.
596/// Returns an OpenType tag and a value.
597fn parse_font_feature(feature_str: &str) -> Option<(u32, u32)> {
598    let mut parts = feature_str.split('=');
599    let tag_str = parts.next()?.trim();
600    let value_str = parts.next().unwrap_or("1").trim(); // Default to 1 (on) if no value
601
602    // OpenType feature tags must be 4 characters long.
603    if tag_str.len() > 4 {
604        return None;
605    }
606    // Pad with spaces if necessary
607    let padded_tag_str = format!("{:<4}", tag_str);
608
609    let tag = u32::from_be_bytes(padded_tag_str.as_bytes().try_into().ok()?);
610    let value = value_str.parse::<u32>().ok()?;
611
612    Some((tag, value))
613}
614
615/// A helper to add OpenType features based on CSS `font-variant-*` properties.
616fn add_variant_features(style: &StyleProperties, features: &mut Vec<FeatureInfo>) {
617    // Helper to add a feature that is simply "on".
618    let mut add_on = |tag_str: &[u8; 4]| {
619        features.push(FeatureInfo {
620            feature_tag: u32::from_be_bytes(*tag_str),
621            alternate: None,
622        });
623    };
624
625    // Note on disabling features: The CSS properties `font-variant-ligatures: none` or
626    // `no-common-ligatures` are meant to disable features that may be on by default for a
627    // given script. The `allsorts` API for applying custom features is additive and does not
628    // currently support disabling default features. This implementation only handles enabling
629    // non-default features.
630
631    // Ligatures
632    match style.font_variant_ligatures {
633        FontVariantLigatures::Discretionary => add_on(b"dlig"),
634        FontVariantLigatures::Historical => add_on(b"hlig"),
635        FontVariantLigatures::Contextual => add_on(b"calt"),
636        _ => {} // Other cases are either default-on or require disabling.
637    }
638
639    // Caps
640    match style.font_variant_caps {
641        FontVariantCaps::SmallCaps => add_on(b"smcp"),
642        FontVariantCaps::AllSmallCaps => {
643            add_on(b"c2sc");
644            add_on(b"smcp");
645        }
646        FontVariantCaps::PetiteCaps => add_on(b"pcap"),
647        FontVariantCaps::AllPetiteCaps => {
648            add_on(b"c2pc");
649            add_on(b"pcap");
650        }
651        FontVariantCaps::Unicase => add_on(b"unic"),
652        FontVariantCaps::TitlingCaps => add_on(b"titl"),
653        FontVariantCaps::Normal => {}
654    }
655
656    // Numeric
657    match style.font_variant_numeric {
658        FontVariantNumeric::LiningNums => add_on(b"lnum"),
659        FontVariantNumeric::OldstyleNums => add_on(b"onum"),
660        FontVariantNumeric::ProportionalNums => add_on(b"pnum"),
661        FontVariantNumeric::TabularNums => add_on(b"tnum"),
662        FontVariantNumeric::DiagonalFractions => add_on(b"frac"),
663        FontVariantNumeric::StackedFractions => add_on(b"afrc"),
664        FontVariantNumeric::Ordinal => add_on(b"ordn"),
665        FontVariantNumeric::SlashedZero => add_on(b"zero"),
666        FontVariantNumeric::Normal => {}
667    }
668}
669
670/// Maps the `hyphenation::Language` enum to an OpenType language tag `u32`.
671#[cfg(feature = "text_layout_hyphenation")]
672fn to_opentype_lang_tag(lang: hyphenation::Language) -> u32 {
673    use hyphenation::Language::*;
674    // A complete list of language tags can be found at:
675    // https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
676    let tag_bytes = match lang {
677        Afrikaans => *b"AFK ",
678        Albanian => *b"SQI ",
679        Armenian => *b"HYE ",
680        Assamese => *b"ASM ",
681        Basque => *b"EUQ ",
682        Belarusian => *b"BEL ",
683        Bengali => *b"BEN ",
684        Bulgarian => *b"BGR ",
685        Catalan => *b"CAT ",
686        Chinese => *b"ZHS ",
687        Coptic => *b"COP ",
688        Croatian => *b"HRV ",
689        Czech => *b"CSY ",
690        Danish => *b"DAN ",
691        Dutch => *b"NLD ",
692        EnglishGB => *b"ENG ",
693        EnglishUS => *b"ENU ",
694        Esperanto => *b"ESP ",
695        Estonian => *b"ETI ",
696        Ethiopic => *b"ETH ",
697        Finnish => *b"FIN ",
698        FinnishScholastic => *b"FIN ",
699        French => *b"FRA ",
700        Friulan => *b"FRL ",
701        Galician => *b"GLC ",
702        Georgian => *b"KAT ",
703        German1901 => *b"DEU ",
704        German1996 => *b"DEU ",
705        GermanSwiss => *b"DES ",
706        GreekAncient => *b"GRC ",
707        GreekMono => *b"ELL ",
708        GreekPoly => *b"ELL ",
709        Gujarati => *b"GUJ ",
710        Hindi => *b"HIN ",
711        Hungarian => *b"HUN ",
712        Icelandic => *b"ISL ",
713        Indonesian => *b"IND ",
714        Interlingua => *b"INA ",
715        Irish => *b"IRI ",
716        Italian => *b"ITA ",
717        Kannada => *b"KAN ",
718        Kurmanji => *b"KUR ",
719        Latin => *b"LAT ",
720        LatinClassic => *b"LAT ",
721        LatinLiturgical => *b"LAT ",
722        Latvian => *b"LVI ",
723        Lithuanian => *b"LTH ",
724        Macedonian => *b"MKD ",
725        Malayalam => *b"MAL ",
726        Marathi => *b"MAR ",
727        Mongolian => *b"MNG ",
728        NorwegianBokmal => *b"NOR ",
729        NorwegianNynorsk => *b"NYN ",
730        Occitan => *b"OCI ",
731        Oriya => *b"ORI ",
732        Pali => *b"PLI ",
733        Panjabi => *b"PAN ",
734        Piedmontese => *b"PMS ",
735        Polish => *b"PLK ",
736        Portuguese => *b"PTG ",
737        Romanian => *b"ROM ",
738        Romansh => *b"RMC ",
739        Russian => *b"RUS ",
740        Sanskrit => *b"SAN ",
741        SerbianCyrillic => *b"SRB ",
742        SerbocroatianCyrillic => *b"SHC ",
743        SerbocroatianLatin => *b"SHL ",
744        SlavonicChurch => *b"CSL ",
745        Slovak => *b"SKY ",
746        Slovenian => *b"SLV ",
747        Spanish => *b"ESP ",
748        Swedish => *b"SVE ",
749        Tamil => *b"TAM ",
750        Telugu => *b"TEL ",
751        Thai => *b"THA ",
752        Turkish => *b"TRK ",
753        Turkmen => *b"TUK ",
754        Ukrainian => *b"UKR ",
755        Uppersorbian => *b"HSB ",
756        Welsh => *b"CYM ",
757    };
758    u32::from_be_bytes(tag_bytes)
759}
760
761/// Internal shaping implementation - the single source of truth for text shaping.
762/// Both FontRef and ParsedFont use this function.
763fn shape_text_internal(
764    parsed_font: &ParsedFont,
765    text: &str,
766    script: Script,
767    language: crate::text3::script::Language,
768    direction: BidiDirection,
769    style: &StyleProperties,
770) -> Result<Vec<Glyph>, LayoutError> {
771    let script_tag = to_opentype_script_tag(script);
772    #[cfg(feature = "text_layout_hyphenation")]
773    let lang_tag = to_opentype_lang_tag(language);
774    #[cfg(not(feature = "text_layout_hyphenation"))]
775    let lang_tag = 0u32;
776
777    // +spec:text-alignment-spacing:4357e6 - non-zero letter-spacing should disable optional ligatures; allsorts API is additive-only so default liga cannot be disabled here
778    // +spec:text-alignment-spacing:24d624 - cursive script letter-spacing behavior is advisory (outside CSS scope per spec note)
779    let mut user_features: Vec<FeatureInfo> = style
780        .font_features
781        .iter()
782        .filter_map(|s| parse_font_feature(s))
783        .map(|(tag, value)| FeatureInfo {
784            feature_tag: tag,
785            alternate: if value > 1 {
786                Some(value as usize)
787            } else {
788                None
789            },
790        })
791        .collect();
792    add_variant_features(style, &mut user_features);
793
794    let opt_gdef = parsed_font.opt_gdef_table.as_ref().map(|v| &**v);
795
796    let mut raw_glyphs: Vec<allsorts::gsub::RawGlyph<()>> = text
797        .char_indices()
798        .filter_map(|(cluster, ch)| {
799            let glyph_index = parsed_font.lookup_glyph_index(ch as u32).unwrap_or(0);
800            if cluster > u16::MAX as usize {
801                None
802            } else {
803                Some(allsorts::gsub::RawGlyph {
804                    unicodes: tinyvec::tiny_vec![[char; 1] => ch],
805                    glyph_index,
806                    liga_component_pos: cluster as u16,
807                    glyph_origin: allsorts::gsub::GlyphOrigin::Char(ch),
808                    flags: allsorts::gsub::RawGlyphFlags::empty(),
809                    extra_data: (),
810                    variation: None,
811                })
812            }
813        })
814        .collect();
815
816    if let Some(gsub) = parsed_font.gsub() {
817        let features = if user_features.is_empty() {
818            Features::Mask(build_feature_mask_for_script(script))
819        } else {
820            Features::Custom(user_features.clone())
821        };
822
823        let dotted_circle_index = parsed_font
824            .lookup_glyph_index(allsorts::DOTTED_CIRCLE as u32)
825            .unwrap_or(0);
826        gsub::apply(
827            dotted_circle_index,
828            gsub,
829            opt_gdef,
830            script_tag,
831            Some(lang_tag),
832            &features,
833            None,
834            parsed_font.num_glyphs(),
835            &mut raw_glyphs,
836        )
837        .map_err(|e| LayoutError::ShapingError(e.to_string()))?;
838    }
839
840    let mut infos = gpos::Info::init_from_glyphs(opt_gdef, raw_glyphs);
841
842    if let Some(gpos) = parsed_font.gpos() {
843        let kern_table = parsed_font
844            .opt_kern_table
845            .as_ref()
846            .map(|kt| kt.as_borrowed());
847        let apply_kerning = true; // Always enable GPOS kern feature (not just when legacy kern table exists)
848        gpos::apply(
849            gpos,
850            opt_gdef,
851            kern_table,
852            apply_kerning,
853            &Features::Custom(user_features),
854            None,
855            script_tag,
856            Some(lang_tag),
857            &mut infos,
858        )
859        .map_err(|e| LayoutError::ShapingError(e.to_string()))?;
860    }
861
862    let font_size = style.font_size_px;
863    let scale_factor = if parsed_font.font_metrics.units_per_em > 0 {
864        font_size / (parsed_font.font_metrics.units_per_em as f32)
865    } else {
866        FALLBACK_SCALE
867    };
868
869    let font_hash = parsed_font.get_hash();
870    let font_metrics = LayoutFontMetrics {
871        ascent: parsed_font.font_metrics.ascent,
872        descent: parsed_font.font_metrics.descent,
873        line_gap: parsed_font.font_metrics.line_gap,
874        units_per_em: parsed_font.font_metrics.units_per_em,
875        x_height: parsed_font.font_metrics.x_height,
876        cap_height: parsed_font.font_metrics.cap_height,
877    };
878    let style_arc = Arc::new(style.clone());
879    let bidi_level = BidiLevel::new(if direction.is_rtl() { 1 } else { 0 });
880
881    let mut shaped_glyphs = Vec::new();
882    for info in infos.iter() {
883        let cluster = info.glyph.liga_component_pos as u32;
884        let source_char = text
885            .get(cluster as usize..)
886            .and_then(|s| s.chars().next())
887            .unwrap_or('\u{FFFD}');
888
889        let base_advance = parsed_font.get_horizontal_advance(info.glyph.glyph_index);
890        // Use hinted advance width when available (matches FreeType/Chrome behavior)
891        let ppem = font_size.round() as u16;
892        let advance = parsed_font
893            .get_hinted_advance_px(info.glyph.glyph_index, ppem)
894            .unwrap_or(base_advance as f32 * scale_factor);
895        let kerning = info.kerning as f32 * scale_factor;
896
897        let (offset_x_units, offset_y_units) =
898            if let allsorts::gpos::Placement::Distance(x, y) = info.placement {
899                (x, y)
900            } else {
901                (0, 0)
902            };
903        let offset_x = offset_x_units as f32 * scale_factor;
904        let offset_y = offset_y_units as f32 * scale_factor;
905
906        let vert = parsed_font.get_vertical_metrics(info.glyph.glyph_index);
907        let glyph = Glyph {
908            glyph_id: info.glyph.glyph_index,
909            codepoint: source_char,
910            font_hash,
911            font_metrics: font_metrics.clone(),
912            style: Arc::clone(&style_arc),
913            source: GlyphSource::Char,
914            logical_byte_index: cluster as usize,
915            logical_byte_len: source_char.len_utf8(),
916            content_index: 0,
917            cluster,
918            advance,
919            kerning,
920            offset: Point {
921                x: offset_x,
922                y: offset_y,
923            },
924            vertical_advance: vert.as_ref().map(|v| v.advance * font_size).unwrap_or(0.0),
925            vertical_origin_y: vert.as_ref().map(|v| v.origin_y * font_size).unwrap_or(0.0),
926            vertical_bearing: vert
927                .map(|v| Point { x: v.bearing_x * font_size, y: v.bearing_y * font_size })
928                .unwrap_or(Point { x: 0.0, y: 0.0 }),
929            orientation: GlyphOrientation::Horizontal,
930            script,
931            bidi_level,
932        };
933        shaped_glyphs.push(glyph);
934    }
935
936    Ok(shaped_glyphs)
937}
938
939/// Public helper function to shape text for ParsedFont, returning Glyph
940/// This is used by the ParsedFontTrait implementation for ParsedFont
941pub fn shape_text_for_parsed_font(
942    parsed_font: &ParsedFont,
943    text: &str,
944    script: Script,
945    language: crate::text3::script::Language,
946    direction: BidiDirection,
947    style: &StyleProperties,
948) -> Result<Vec<Glyph>, LayoutError> {
949    // Delegate to the single internal implementation
950    shape_text_internal(parsed_font, text, script, language, direction, style)
951}