Skip to main content

zenith_layout/
rustybuzz_engine.rs

1//! `rustybuzz`-backed shaping engine for Zenith.
2//!
3//! This is the ONLY module in the crate that imports `rustybuzz` or
4//! `rustybuzz::ttf_parser`. No third-party type escapes to a public signature.
5
6use std::collections::BTreeSet;
7
8use zenith_core::FontProvider;
9
10use crate::engine::{
11    FallbackResult, PositionedGlyph, ShapeRequest, TextDirection, TextLayoutEngine, ZenithGlyphRun,
12};
13use crate::error::LayoutError;
14
15/// Code points that legitimately have no standalone glyph (consumed during
16/// shaping) and must NOT be reported as missing: control/whitespace and the
17/// Unicode default-ignorable ranges (joiners, bidi marks, variation selectors,
18/// BOM, soft hyphen, word joiner, etc.).
19fn is_ignorable_for_coverage(ch: char) -> bool {
20    ch.is_control()
21        || ch.is_whitespace()
22        || matches!(
23            ch as u32,
24            0x00AD            // soft hyphen
25            | 0x200B..=0x200F // ZWSP, ZWNJ, ZWJ, LRM, RLM
26            | 0x202A..=0x202E // bidi embeddings/overrides
27            | 0x2060..=0x206F // word joiner, invisible operators, deprecated format
28            | 0xFEFF          // BOM / ZWNBSP
29            | 0xFE00..=0xFE0F // variation selectors
30            | 0xE0100..=0xE01EF // variation selectors supplement
31        )
32}
33
34/// HarfBuzz-port shaping engine backed by `rustybuzz` and `rustybuzz::ttf_parser`.
35///
36/// Construct once and reuse across many `shape` calls; the engine is stateless.
37#[derive(Debug, Clone)]
38pub struct RustybuzzEngine;
39
40impl RustybuzzEngine {
41    /// Create a new `RustybuzzEngine`.
42    #[must_use]
43    pub fn new() -> Self {
44        Self
45    }
46}
47
48impl Default for RustybuzzEngine {
49    fn default() -> Self {
50        Self::new()
51    }
52}
53
54impl RustybuzzEngine {
55    /// Shape `text` with an already-parsed `face` and produce a single
56    /// [`ZenithGlyphRun`] tagged with `font_id`.
57    ///
58    /// This is the one place shaping, scaling, and metric derivation live, so
59    /// `shape` (single-face) and `shape_with_fallback` (per-glyph fallback)
60    /// cannot diverge: both route every run through here. Glyphs are positioned
61    /// from `x = 0` within the run.
62    ///
63    /// # Errors
64    ///
65    /// Returns `LayoutError` if the face reports `units_per_em <= 0`.
66    fn shape_run_with_face(
67        face: &rustybuzz::Face<'_>,
68        text: &str,
69        font_id: String,
70        font_size: f32,
71        direction: TextDirection,
72    ) -> Result<ZenithGlyphRun, LayoutError> {
73        // ── Compute pixel scale ───────────────────────────────────────────────
74        // `units_per_em` comes from the `ttf_parser::Face` trait exposed by
75        // `rustybuzz::Face` via Deref.
76        let units_per_em = face.units_per_em();
77        if units_per_em <= 0 {
78            return Err(LayoutError::new(format!(
79                "font '{font_id}' reports units_per_em = {units_per_em}"
80            )));
81        }
82        // `units_per_em` is a positive `i32` (guarded above); the OTF spec
83        // range (16–16384) is exactly representable as `f32`.
84        let scale = font_size / units_per_em as f32;
85
86        // ── Derive line metrics ───────────────────────────────────────────────
87        // `ascender` and `descender` are in font units; descender is negative.
88        let ascent = f32::from(face.ascender()) * scale;
89        let descent = -(f32::from(face.descender()) * scale); // store positive magnitude
90        let line_gap = f32::from(face.line_gap()) * scale;
91        let line_height = ascent + descent + line_gap;
92
93        // ── Shape the text ────────────────────────────────────────────────────
94        let mut buffer = rustybuzz::UnicodeBuffer::new();
95        buffer.push_str(text);
96        // RTL sets the buffer direction so rustybuzz reorders glyphs to visual
97        // order and applies RTL-correct joining (Arabic, Hebrew). The run's
98        // advance + glyph pen positions stay left-to-right, so a word emitted
99        // at its left x renders correctly; LTR is the default (unchanged).
100        buffer.set_direction(match direction {
101            TextDirection::Ltr => rustybuzz::Direction::LeftToRight,
102            TextDirection::Rtl => rustybuzz::Direction::RightToLeft,
103        });
104
105        // Shape with no extra features; deterministic across machines.
106        let glyph_buffer = rustybuzz::shape(face, &[], buffer);
107
108        let infos = glyph_buffer.glyph_infos();
109        let positions = glyph_buffer.glyph_positions();
110
111        // ── Cluster → source-text boundaries ──────────────────────────────────
112        // Each glyph carries `cluster`: the byte offset into `text` it derives
113        // from. The sorted, deduplicated set of cluster offsets gives the source
114        // substring boundaries: a cluster starting at offset `c` spans up to the
115        // next greater offset (or `text.len()`). The FIRST glyph of each cluster
116        // carries that whole substring (so a ligature's single glyph maps to all
117        // its chars); later glyphs of the same cluster carry the empty string (so
118        // a one-char→many-glyph decomposition is not duplicated). This per-glyph
119        // Unicode mapping is what the PDF backend turns into a ToUnicode CMap.
120        let mut boundaries: Vec<u32> = infos.iter().map(|i| i.cluster).collect();
121        boundaries.sort_unstable();
122        boundaries.dedup();
123        let cluster_text = |cluster: u32| -> String {
124            let start = cluster as usize;
125            let end = match boundaries.binary_search(&cluster) {
126                Ok(i) => boundaries.get(i + 1).map_or(text.len(), |&b| b as usize),
127                // A cluster value not in the set cannot happen (it was collected
128                // from the same infos); fall back to a single source char span.
129                Err(_) => text.len(),
130            };
131            text.get(start..end).unwrap_or("").to_string()
132        };
133
134        // ── Build glyph list ──────────────────────────────────────────────────
135        let mut glyphs: Vec<PositionedGlyph> = Vec::with_capacity(infos.len());
136        let mut pen_x: f32 = 0.0;
137        let mut pen_y: f32 = 0.0;
138        let mut prev_cluster: Option<u32> = None;
139
140        for (info, pos) in infos.iter().zip(positions.iter()) {
141            // glyph_id is u32 in rustybuzz; OTF glyph IDs fit in u16 (max 65535).
142            // A value above u16::MAX indicates a malformed font — map it to the
143            // .notdef glyph (0) rather than silently truncating.
144            let glyph_id = u16::try_from(info.glyph_id).unwrap_or(0);
145
146            let x = pen_x + pos.x_offset as f32 * scale;
147            // y_offset is in font units; positive = up in font coords → negative screen y.
148            let y = pen_y - pos.y_offset as f32 * scale;
149
150            // First glyph of a new cluster carries the source text; repeats are empty.
151            let glyph_text = if prev_cluster == Some(info.cluster) {
152                String::new()
153            } else {
154                cluster_text(info.cluster)
155            };
156            prev_cluster = Some(info.cluster);
157
158            glyphs.push(PositionedGlyph {
159                glyph_id,
160                x,
161                y,
162                text: glyph_text,
163            });
164
165            pen_x += pos.x_advance as f32 * scale;
166            pen_y += pos.y_advance as f32 * scale;
167        }
168
169        let advance_width = pen_x;
170
171        Ok(ZenithGlyphRun {
172            font_id,
173            font_size,
174            ascent,
175            descent,
176            line_height,
177            advance_width,
178            glyphs,
179        })
180    }
181}
182
183impl TextLayoutEngine for RustybuzzEngine {
184    fn shape(
185        &self,
186        req: &ShapeRequest<'_>,
187        provider: &dyn FontProvider,
188    ) -> Result<ZenithGlyphRun, LayoutError> {
189        // ── 1. Resolve font bytes ─────────────────────────────────────────────
190        let font_data = provider
191            .resolve(req.families, req.weight, req.style)
192            .ok_or_else(|| {
193                LayoutError::new(format!("no font resolved for families {:?}", req.families))
194            })?;
195
196        // ── 2. Parse the font face ────────────────────────────────────────────
197        let face =
198            rustybuzz::Face::from_slice(&font_data.bytes, font_data.index).ok_or_else(|| {
199                LayoutError::new(format!(
200                    "failed to parse font face for '{}' (index {})",
201                    font_data.id, font_data.index
202                ))
203            })?;
204
205        // ── 3. Shape via the shared single-face helper ────────────────────────
206        Self::shape_run_with_face(&face, req.text, font_data.id, req.font_size, req.direction)
207    }
208
209    fn shape_with_fallback(
210        &self,
211        req: &ShapeRequest<'_>,
212        provider: &dyn FontProvider,
213    ) -> Result<FallbackResult, LayoutError> {
214        // ── 1. Resolve + parse the PRIMARY face ───────────────────────────────
215        let primary_data = provider
216            .resolve(req.families, req.weight, req.style)
217            .ok_or_else(|| {
218                LayoutError::new(format!("no font resolved for families {:?}", req.families))
219            })?;
220        let primary_face = rustybuzz::Face::from_slice(&primary_data.bytes, primary_data.index)
221            .ok_or_else(|| {
222                LayoutError::new(format!(
223                    "failed to parse font face for '{}' (index {})",
224                    primary_data.id, primary_data.index
225                ))
226            })?;
227
228        // ── 2. Build an ordered, deduplicated face cache for coverage probing ─
229        // The primary occupies index 0; remaining faces follow in the
230        // deterministic order `provider.all_faces()` returns them, skipping any
231        // face that shares the primary's id (so the primary is never duplicated).
232        // Each entry parses once and is reused for every coverage check + shape.
233        // Bind the owned face data for the whole function so the parsed
234        // `Face`s below can borrow from it (they outlive a per-iteration temp).
235        let all_faces_data = provider.all_faces();
236        let mut faces: Vec<(String, rustybuzz::Face<'_>)> =
237            vec![(primary_data.id.clone(), primary_face)];
238        for fd in &all_faces_data {
239            if fd.id == primary_data.id {
240                continue;
241            }
242            // A face whose bytes fail to parse simply cannot cover any glyph;
243            // skip it rather than failing the whole shape.
244            if let Some(f) = rustybuzz::Face::from_slice(&fd.bytes, fd.index) {
245                faces.push((fd.id.clone(), f));
246            }
247        }
248
249        // Coverage test: does the face at `faces[idx]` have a glyph for `ch`?
250        // Uses ttf-parser's `Face::glyph_index`, exposed on `rustybuzz::Face`
251        // via Deref.
252        let covers = |idx: usize, ch: char| -> bool {
253            faces
254                .get(idx)
255                .is_some_and(|(_, f)| f.glyph_index(ch).is_some())
256        };
257
258        // ── 3. Itemize text into contiguous sub-runs by chosen face index ─────
259        // For each char: prefer the primary (index 0) when it covers the char;
260        // otherwise the FIRST non-primary face (lowest index ≥ 1) that covers
261        // it; otherwise the primary (so it shapes as .notdef / tofu, matching
262        // current behavior). Consecutive chars with the same chosen face merge.
263        // Sub-run boundaries are recorded as byte ranges into `req.text` so the
264        // exact substring is shaped (and reported as the run's source text).
265        // Chars that fall back to the primary (index 0) because NO face covers
266        // them are recorded in `missing` (unless ignorable).
267        let mut missing: BTreeSet<char> = BTreeSet::new();
268
269        // (face_idx, byte_start, byte_end) per sub-run, in text order.
270        let mut segments: Vec<(usize, usize, usize)> = Vec::new();
271        for (byte_off, ch) in req.text.char_indices() {
272            let idx = if covers(0, ch) {
273                0
274            } else {
275                let mut chosen = 0_usize;
276                for idx in 1..faces.len() {
277                    if covers(idx, ch) {
278                        chosen = idx;
279                        break;
280                    }
281                }
282                // chosen == 0 means no face covered it; record as missing.
283                if chosen == 0 && !is_ignorable_for_coverage(ch) {
284                    missing.insert(ch);
285                }
286                chosen
287            };
288            let ch_end = byte_off + ch.len_utf8();
289            match segments.last_mut() {
290                Some((last_idx, _, last_end)) if *last_idx == idx => {
291                    *last_end = ch_end;
292                }
293                _ => segments.push((idx, byte_off, ch_end)),
294            }
295        }
296
297        // Empty text → no segments; shape the empty string with the primary so
298        // a (degenerate but valid) run with primary metrics is still returned,
299        // matching `shape("")`.
300        if segments.is_empty() {
301            let (font_id, face) = faces.first().ok_or_else(|| {
302                LayoutError::new("internal: primary face missing from cache".to_owned())
303            })?;
304            return Ok(FallbackResult {
305                runs: vec![Self::shape_run_with_face(
306                    face,
307                    req.text,
308                    font_id.clone(),
309                    req.font_size,
310                    req.direction,
311                )?],
312                missing_chars: missing.into_iter().collect(),
313            });
314        }
315
316        // ── 4. Shape each sub-run with its chosen face ────────────────────────
317        // The all-primary case is exactly one segment at index 0 spanning the
318        // whole text → a single run byte-identical to `shape`, because both
319        // call `shape_run_with_face` with the same face, text, id, and size.
320        //
321        // Segments are itemized in LOGICAL (text) order. The returned runs are
322        // concatenated left-to-right by the caller, so for RTL the FIRST logical
323        // segment must sit rightmost: reverse the emission order. A single
324        // segment (the common all-primary case) is unaffected, and LTR keeps
325        // logical order — so both the LTR path and a single-run RTL word stay
326        // byte-identical.
327        if req.direction == TextDirection::Rtl {
328            segments.reverse();
329        }
330        let mut runs: Vec<ZenithGlyphRun> = Vec::with_capacity(segments.len());
331        for (idx, start, end) in segments {
332            let (font_id, face) = faces.get(idx).ok_or_else(|| {
333                LayoutError::new("internal: chosen face index out of range".to_owned())
334            })?;
335            let sub_text = req.text.get(start..end).ok_or_else(|| {
336                LayoutError::new("internal: sub-run byte range out of bounds".to_owned())
337            })?;
338            runs.push(Self::shape_run_with_face(
339                face,
340                sub_text,
341                font_id.clone(),
342                req.font_size,
343                req.direction,
344            )?);
345        }
346
347        Ok(FallbackResult {
348            runs,
349            missing_chars: missing.into_iter().collect(),
350        })
351    }
352}
353
354// ─────────────────────────────────────────────────────────────────────────────
355// Tests
356// ─────────────────────────────────────────────────────────────────────────────
357
358#[cfg(test)]
359mod tests {
360    use zenith_core::{FontStyle, default_provider};
361
362    use super::*;
363
364    fn shape_at(font_size: f32) -> Result<ZenithGlyphRun, LayoutError> {
365        let families = vec!["Noto Sans".to_string()];
366        let req = ShapeRequest {
367            text: "Hello Zenith",
368            families: &families,
369            weight: 400,
370            style: FontStyle::Normal,
371            font_size,
372            direction: TextDirection::Ltr,
373        };
374        let provider = default_provider();
375        RustybuzzEngine::new().shape(&req, &provider)
376    }
377
378    #[test]
379    fn shape_hello_zenith_at_24px() {
380        let run = shape_at(24.0).expect("shaping should succeed");
381
382        // font_id matches the registered stable id.
383        assert_eq!(run.font_id, "noto-sans-400-normal");
384
385        // Glyph count: "Hello Zenith" = 12 characters including the space.
386        assert!(
387            run.glyphs.len() >= 10,
388            "expected >= 10 glyphs, got {}",
389            run.glyphs.len()
390        );
391
392        // Metrics sanity.
393        assert!(
394            run.ascent > 0.0,
395            "ascent must be positive, got {}",
396            run.ascent
397        );
398        assert!(
399            run.advance_width > 0.0,
400            "advance_width must be positive, got {}",
401            run.advance_width
402        );
403
404        // Glyph x positions must be non-decreasing (monotonic pen advance).
405        let mut prev_x = f32::NEG_INFINITY;
406        for g in &run.glyphs {
407            assert!(
408                g.x >= prev_x - 1e-4,
409                "x positions must be non-decreasing: {} < {}",
410                g.x,
411                prev_x
412            );
413            prev_x = g.x;
414        }
415    }
416
417    #[test]
418    fn shaping_is_deterministic() {
419        let run1 = shape_at(24.0).expect("first shape");
420        let run2 = shape_at(24.0).expect("second shape");
421        assert_eq!(run1, run2, "shaping must be deterministic");
422    }
423
424    #[test]
425    fn unknown_family_returns_error() {
426        let families = vec!["Nonexistent".to_string()];
427        let req = ShapeRequest {
428            text: "test",
429            families: &families,
430            weight: 400,
431            style: FontStyle::Normal,
432            font_size: 16.0,
433            direction: TextDirection::Ltr,
434        };
435        let provider = default_provider();
436        let result = RustybuzzEngine::new().shape(&req, &provider);
437        assert!(result.is_err(), "unknown family must return Err");
438        let msg = result.unwrap_err().message;
439        assert!(
440            msg.contains("no font resolved"),
441            "error message should mention unresolved font, got: {msg}"
442        );
443    }
444
445    #[test]
446    fn fallback_all_primary_matches_single_shape() {
447        // CRITICAL byte-identity guarantee: text fully covered by the primary
448        // face must yield exactly ONE run identical to `shape()`.
449        let families = vec!["Noto Sans".to_string()];
450        let req = ShapeRequest {
451            text: "Hello Zenith 123!",
452            families: &families,
453            weight: 400,
454            style: FontStyle::Normal,
455            font_size: 24.0,
456            direction: TextDirection::Ltr,
457        };
458        let provider = default_provider();
459        let engine = RustybuzzEngine::new();
460
461        let single = engine.shape(&req, &provider).expect("single-run shape");
462        let result = engine
463            .shape_with_fallback(&req, &provider)
464            .expect("fallback shape");
465
466        assert_eq!(
467            result.runs.len(),
468            1,
469            "all-primary text must produce exactly one run"
470        );
471        assert_eq!(
472            result.runs.first().expect("one run"),
473            &single,
474            "all-primary fallback run must be byte-identical to shape()"
475        );
476        assert!(
477            result.missing_chars.is_empty(),
478            "fully-covered ASCII must have no missing chars"
479        );
480    }
481
482    #[test]
483    fn fallback_empty_text_matches_single_shape() {
484        // Degenerate empty input must still match `shape("")` (one run).
485        let families = vec!["Noto Sans".to_string()];
486        let req = ShapeRequest {
487            text: "",
488            families: &families,
489            weight: 400,
490            style: FontStyle::Normal,
491            font_size: 16.0,
492            direction: TextDirection::Ltr,
493        };
494        let provider = default_provider();
495        let engine = RustybuzzEngine::new();
496
497        let single = engine.shape(&req, &provider).expect("single empty shape");
498        let result = engine
499            .shape_with_fallback(&req, &provider)
500            .expect("fallback empty shape");
501        assert_eq!(
502            result.runs.len(),
503            1,
504            "empty text still yields one (degenerate) run"
505        );
506        assert_eq!(result.runs.first().expect("one run"), &single);
507    }
508
509    #[test]
510    fn fallback_unknown_primary_returns_error() {
511        // No resolvable primary → Err, exactly like `shape`.
512        let families = vec!["Nonexistent".to_string()];
513        let req = ShapeRequest {
514            text: "test",
515            families: &families,
516            weight: 400,
517            style: FontStyle::Normal,
518            font_size: 16.0,
519            direction: TextDirection::Ltr,
520        };
521        let provider = default_provider();
522        let result = RustybuzzEngine::new().shape_with_fallback(&req, &provider);
523        assert!(result.is_err(), "unknown primary family must return Err");
524    }
525
526    #[test]
527    fn fallback_is_deterministic() {
528        let families = vec!["Noto Sans".to_string()];
529        let req = ShapeRequest {
530            text: "Hi there",
531            families: &families,
532            weight: 400,
533            style: FontStyle::Normal,
534            font_size: 18.0,
535            direction: TextDirection::Ltr,
536        };
537        let provider = default_provider();
538        let engine = RustybuzzEngine::new();
539        let a = engine.shape_with_fallback(&req, &provider).expect("a");
540        let b = engine.shape_with_fallback(&req, &provider).expect("b");
541        assert_eq!(a.runs, b.runs, "fallback shaping must be deterministic");
542        assert_eq!(
543            a.missing_chars, b.missing_chars,
544            "missing_chars must be deterministic"
545        );
546    }
547
548    #[test]
549    fn rtl_reverses_visual_glyph_order() {
550        // For a non-joining script (Latin), RTL shaping reorders glyphs to
551        // visual (right-to-left) order: the RTL glyph_id sequence is the reverse
552        // of the LTR one, while the total advance stays positive and equal.
553        let families = vec!["Noto Sans".to_string()];
554        let provider = default_provider();
555        let engine = RustybuzzEngine::new();
556
557        let ltr = engine
558            .shape(
559                &ShapeRequest {
560                    text: "ABC",
561                    families: &families,
562                    weight: 400,
563                    style: FontStyle::Normal,
564                    font_size: 24.0,
565                    direction: TextDirection::Ltr,
566                },
567                &provider,
568            )
569            .expect("ltr shape");
570        let rtl = engine
571            .shape(
572                &ShapeRequest {
573                    text: "ABC",
574                    families: &families,
575                    weight: 400,
576                    style: FontStyle::Normal,
577                    font_size: 24.0,
578                    direction: TextDirection::Rtl,
579                },
580                &provider,
581            )
582            .expect("rtl shape");
583
584        let ltr_ids: Vec<u16> = ltr.glyphs.iter().map(|g| g.glyph_id).collect();
585        let mut rtl_ids: Vec<u16> = rtl.glyphs.iter().map(|g| g.glyph_id).collect();
586        rtl_ids.reverse();
587        assert_eq!(
588            ltr_ids, rtl_ids,
589            "RTL glyph order must be the visual reverse of LTR"
590        );
591        assert!(rtl.advance_width > 0.0, "RTL advance must be positive");
592        assert!(
593            (rtl.advance_width - ltr.advance_width).abs() < 1e-3,
594            "RTL and LTR total advance must match"
595        );
596    }
597
598    #[test]
599    fn rtl_shaping_is_deterministic() {
600        let families = vec!["Noto Sans".to_string()];
601        let provider = default_provider();
602        let engine = RustybuzzEngine::new();
603        let req = ShapeRequest {
604            text: "Shalom",
605            families: &families,
606            weight: 400,
607            style: FontStyle::Normal,
608            font_size: 20.0,
609            direction: TextDirection::Rtl,
610        };
611        let a = engine.shape(&req, &provider).expect("a");
612        let b = engine.shape(&req, &provider).expect("b");
613        assert_eq!(a, b, "RTL shaping must be deterministic");
614    }
615
616    #[test]
617    fn font_size_scaling_proportional() {
618        let run24 = shape_at(24.0).expect("24px");
619        let run48 = shape_at(48.0).expect("48px");
620
621        // Ascent should be ~2× when font_size doubles.
622        let ratio_ascent = run48.ascent / run24.ascent;
623        assert!(
624            (ratio_ascent - 2.0).abs() < 0.01,
625            "ascent ratio should be ~2.0, got {ratio_ascent}"
626        );
627
628        // advance_width should also be ~2×.
629        let ratio_adv = run48.advance_width / run24.advance_width;
630        assert!(
631            (ratio_adv - 2.0).abs() < 0.01,
632            "advance_width ratio should be ~2.0, got {ratio_adv}"
633        );
634    }
635}