Skip to main content

oxideav_scribe/
face_chain.rs

1//! `FaceChain` — ordered list of faces consulted in priority order
2//! when shaping. Round-2 fallback support: when the primary face
3//! doesn't have a glyph for a codepoint, the chain walks down the list
4//! until one does, falling back to the primary's `.notdef` only if no
5//! face provides a glyph.
6//!
7//! ## Design
8//!
9//! Each codepoint is mapped to `(face_idx, glyph_id)` independently;
10//! ligature substitution + kerning then run on the resulting glyph runs
11//! face-by-face (because GSUB / GPOS lookups are face-local — you can't
12//! ligate an "f" from face A with an "i" from face B).
13//!
14//! The fallback decision is "first face where `glyph_index` returns a
15//! non-zero, defined glyph". A returned `0` is treated as `.notdef`
16//! (i.e. *not present*) because the primary face's `.notdef` is what
17//! we'd use as the *final* fallback anyway, and skipping over it lets
18//! a fallback face provide a real glyph.
19//!
20//! ## Cache key impact
21//!
22//! `PositionedGlyph::face_idx` lets the rasterizer pick the right face
23//! out of the chain. The cache key already keys by `face_id` (per-face,
24//! globally unique), so a glyph from face[0] and a glyph from face[1]
25//! with the same numerical `glyph_id` never collide.
26
27use crate::face::Face;
28use crate::shaper::{shape_run_with_font, PositionedGlyph};
29use crate::shaping::arabic::{compute_forms, script_of, Script};
30use crate::shaping::arabic_pf::presentation_form;
31use crate::shaping::indic::{
32    cluster_boundaries_with, reorder_cluster_with, script_indic_tags, IndicCategory, ReorderRules,
33    BENGALI_RULES, DEVANAGARI_RULES, GUJARATI_RULES, GURMUKHI_RULES, KANNADA_RULES, KHMER_RULES,
34    MALAYALAM_RULES, ORIYA_RULES, SINHALA_RULES, TAMIL_RULES, TELUGU_RULES, THAI_RULES,
35};
36use crate::style::Style;
37use crate::Error;
38
39/// Ordered chain of faces. Index 0 is the primary; index N is consulted
40/// only if 0..N all returned `.notdef` for a codepoint.
41#[derive(Debug)]
42pub struct FaceChain {
43    faces: Vec<Face>,
44}
45
46impl FaceChain {
47    /// Build a chain from a single primary face. Use
48    /// [`FaceChain::push_fallback`] (chainable) to append fallbacks.
49    pub fn new(primary: Face) -> Self {
50        Self {
51            faces: vec![primary],
52        }
53    }
54
55    /// Append a fallback face to the end of the chain. Builder-style:
56    /// `FaceChain::new(latin).push_fallback(cjk).push_fallback(emoji)`.
57    #[must_use]
58    pub fn push_fallback(mut self, face: Face) -> Self {
59        self.faces.push(face);
60        self
61    }
62
63    /// Number of faces in the chain (including the primary).
64    pub fn len(&self) -> usize {
65        self.faces.len()
66    }
67
68    /// True if the chain has no faces — never the case for chains
69    /// constructed via [`FaceChain::new`], present only because clippy
70    /// rightly complains when `len()` exists alone.
71    pub fn is_empty(&self) -> bool {
72        self.faces.is_empty()
73    }
74
75    /// Borrow face at `idx`. Panics if `idx >= len()` — the rasterizer
76    /// always reads `face_idx` from a `PositionedGlyph` produced by
77    /// this chain so the index is bounded by construction.
78    pub fn face(&self, idx: u16) -> &Face {
79        &self.faces[idx as usize]
80    }
81
82    /// Borrow the primary face — useful for size/metric queries.
83    pub fn primary(&self) -> &Face {
84        &self.faces[0]
85    }
86
87    /// Mutably borrow face at `idx`. Used to flip per-face state like
88    /// variation coordinates without rebuilding the chain. Panics if
89    /// `idx >= len()`.
90    pub fn face_mut(&mut self, idx: usize) -> &mut Face {
91        &mut self.faces[idx]
92    }
93
94    /// Set the variation coordinates on the **primary face** (index 0).
95    /// Convenience wrapper around [`Face::set_variation_coords`] for
96    /// the common case of "shape this run at `wght=600 / wdth=125`".
97    /// Mirrors [`Face::set_variation_coords`]'s clamp + length cap and
98    /// returns its error variant unchanged.
99    ///
100    /// Fallback faces in the chain are NOT touched — call
101    /// [`FaceChain::face_mut`] explicitly if a fallback also needs
102    /// variation coords (rare in practice; fallback faces typically
103    /// cover a different script and are loaded from a static cut).
104    pub fn set_variation_coords(&mut self, coords: &[f32]) -> Result<(), Error> {
105        self.faces[0].set_variation_coords(coords)
106    }
107
108    /// Named instances published by the face at `face_index`. Empty
109    /// vec when the face is static / OTF, or when the index is out of
110    /// range. Mirrors [`Face::named_instances`] for the chosen face.
111    pub fn named_instances(&self, face_index: usize) -> Vec<crate::NamedInstance> {
112        self.faces
113            .get(face_index)
114            .map(|f| f.named_instances())
115            .unwrap_or_default()
116    }
117
118    /// Variation axes published by the face at `face_index`. Empty vec
119    /// when the face is static / OTF, or when the index is out of
120    /// range. Mirrors [`Face::variation_axes`] for the chosen face.
121    pub fn variation_axes(&self, face_index: usize) -> Vec<crate::VariationAxis> {
122        self.faces
123            .get(face_index)
124            .map(|f| f.variation_axes())
125            .unwrap_or_default()
126    }
127
128    /// Shape `text` with full chain fallback at default style (upright,
129    /// regular).
130    pub fn shape(&self, text: &str, size_px: f32) -> Result<Vec<PositionedGlyph>, Error> {
131        self.shape_styled(text, size_px, Style::REGULAR)
132    }
133
134    /// Shape `text` honouring `style` (italic / weight). The shear
135    /// derived from `style` is applied at rasterise-time, not at
136    /// shape-time — so glyph positions / advances stay identical
137    /// regardless of italic. This matches what desktop shapers do
138    /// (synthesised italic doesn't change the metrics).
139    pub fn shape_styled(
140        &self,
141        text: &str,
142        size_px: f32,
143        _style: Style,
144    ) -> Result<Vec<PositionedGlyph>, Error> {
145        if text.is_empty() || size_px <= 0.0 {
146            return Ok(Vec::new());
147        }
148
149        // Step 1: per-codepoint, decide which face owns this glyph.
150        // Result: Vec<(face_idx, glyph_id)>.
151        let assigned = self.assign_codepoints(text)?;
152        if assigned.is_empty() {
153            return Ok(Vec::new());
154        }
155
156        // Step 2: walk runs of consecutive (face_idx) glyphs and shape
157        // each run within the appropriate face. Each run gets its own
158        // GSUB + GPOS pass.
159        let mut out: Vec<PositionedGlyph> = Vec::with_capacity(assigned.len());
160        let mut run_start = 0usize;
161        while run_start < assigned.len() {
162            let face_idx = assigned[run_start].0;
163            let mut run_end = run_start + 1;
164            while run_end < assigned.len() && assigned[run_end].0 == face_idx {
165                run_end += 1;
166            }
167            let gids: Vec<u16> = assigned[run_start..run_end].iter().map(|p| p.1).collect();
168            let face = &self.faces[face_idx as usize];
169            let mut run_glyphs =
170                face.with_font(|font| shape_run_with_font(font, &gids, size_px, face_idx))?;
171            out.append(&mut run_glyphs);
172            run_start = run_end;
173        }
174        Ok(out)
175    }
176
177    /// Per-codepoint face assignment. For every char in `text`, walk
178    /// the chain and pick the first face whose `glyph_index` returns a
179    /// non-zero glyph id. If none does, fall back to face 0 with glyph
180    /// 0 (.notdef) — measurement still works, the user sees tofu.
181    ///
182    /// **Round 7 — Arabic contextual joining.** Before cmap lookup the
183    /// input chars are run through the joining state machine in
184    /// [`crate::shaping::arabic`] and Arabic letters are translated to
185    /// their Presentation Forms-B equivalents (U+FE70..U+FEFF). Forms
186    /// the active face doesn't have a glyph for fall back to the
187    /// original base codepoint — so the worst case is "render in
188    /// isolated form" (the round-6 behaviour), which is the right
189    /// graceful degradation for a font that ships only base glyphs.
190    ///
191    /// **Round 8 — Devanagari cluster reorder.** Devanagari runs are
192    /// segmented into clusters (one base consonant with its halant
193    /// chains, matras, and modifiers) and each cluster is rewritten
194    /// to its visual order: pre-base matras (U+093F) move to the
195    /// front of the cluster so a cmap-only font still draws the
196    /// cluster correctly. Reph identification is performed but the
197    /// actual glyph substitution is gated on the `rphf` GSUB feature
198    /// (followup once `oxideav-ttf` exposes feature-tagged single
199    /// substitution).
200    fn assign_codepoints(&self, text: &str) -> Result<Vec<(u16, u16)>, Error> {
201        let chars: Vec<char> = text.chars().collect();
202        // Three pre-cmap shaping passes: Arabic joining → Indic
203        // cluster reorder → reph GSUB (post-cmap, deferred to after
204        // the codepoint→glyph assignment loop). The scripts are
205        // pairwise disjoint so the order doesn't matter for any one;
206        // we run Arabic first to match the round-7 behaviour exactly
207        // when no Indic input is present.
208        let arabic_shaped = apply_arabic_joining(&chars);
209        let (shaped_chars, reph_marks, cluster_spans) = apply_indic_reorder(&arabic_shaped);
210        let mut out: Vec<(u16, u16)> = Vec::with_capacity(shaped_chars.len());
211        for (orig_idx, ch) in shaped_chars.iter().enumerate() {
212            let mut found: Option<(u16, u16)> = None;
213            for (idx, face) in self.faces.iter().enumerate() {
214                let g = face.with_font(|font| font.glyph_index(*ch))?;
215                match g {
216                    Some(gid) if gid != 0 => {
217                        found = Some((idx as u16, gid));
218                        break;
219                    }
220                    _ => continue,
221                }
222            }
223            // If the substituted presentation-form is missing in every
224            // face, retry with the corresponding original (base)
225            // codepoint — this is the graceful fallback the doc-comment
226            // describes. The Indic pass only reorders (no
227            // substitution), so the original char is already present
228            // somewhere in `chars`; the Arabic pass substitutes 1:1
229            // so `chars[orig_idx]` is the right base when the
230            // permutation is identity. Use position equality where
231            // possible, fall back to char-value lookup otherwise.
232            if found.is_none() {
233                let orig = if orig_idx < chars.len() && *ch != chars[orig_idx] {
234                    Some(chars[orig_idx])
235                } else if !chars.contains(ch) {
236                    None
237                } else {
238                    // The shaped char is present in the original input
239                    // — no substitution happened, so the retry is a
240                    // no-op (the same lookup we already did failed).
241                    None
242                };
243                if let Some(orig) = orig {
244                    for (idx, face) in self.faces.iter().enumerate() {
245                        let g = face.with_font(|font| font.glyph_index(orig))?;
246                        if let Some(gid) = g {
247                            if gid != 0 {
248                                found = Some((idx as u16, gid));
249                                break;
250                            }
251                        }
252                    }
253                }
254            }
255            // No face had it — render as primary's .notdef.
256            out.push(found.unwrap_or((0, 0)));
257        }
258        // Apply reph GSUB substitution: for each `RephMark` we identified
259        // in the pre-cmap pass, look up the `rphf` feature on the
260        // *assigned* face for the RA glyph, apply LookupType 1, and if
261        // a substitute is returned, rewrite the RA gid + drop the halant
262        // by replacing the halant slot with the same gid as the
263        // following base consonant — i.e. the cluster collapses
264        // [reph_gid, halant, base, ...] → [reph_gid, base, base, ...]
265        // and the duplicate base is removed below. Marks for which the
266        // active face publishes no `rphf` lookup are silently skipped
267        // (the cluster falls back to the in-line RA + halant + base
268        // rendering, which is the round-8 behaviour).
269        //
270        // We process marks back-to-front so the index manipulation
271        // stays straightforward (no shifting of pending marks).
272        let (out, dropped_halants) = self.apply_reph_substitutions(out, &reph_marks)?;
273        // Round 11 — cluster-position-aware GSUB pass. For every Indic
274        // cluster, dispatch `half` / `pref` / `blwf` / `abvf` / `pstf`
275        // (per-position substitution) on halant-suffixed conjunct
276        // components, then `pres` / `psts` / `abvs` / `blws` (cluster-
277        // wide presentation features) on every glyph in the cluster.
278        // Coverage misses pass through unchanged so a font without a
279        // given lookup degrades gracefully (the round-10 behaviour).
280        //
281        // The reph pass may have removed halant glyphs from `out`; the
282        // `dropped_halants` Vec tells us which post-reorder character
283        // indices are now absent so we can shift cluster span ends
284        // accordingly. (Reph removal touches the END of a cluster's
285        // first 3 chars, never its boundary, so the START index is
286        // always still valid.)
287        let adjusted_spans = adjust_cluster_spans(&cluster_spans, &dropped_halants, &shaped_chars);
288        let out = self.apply_cluster_position_substitutions(out, &adjusted_spans, &shaped_chars)?;
289        Ok(out)
290    }
291
292    /// For each `RephMark`, apply the active face's `rphf` GSUB lookup
293    /// to the RA glyph and drop the halant glyph if a substitute is
294    /// returned. Marks for which no `rphf` lookup applies pass through
295    /// unchanged.
296    ///
297    /// Returns the rewritten glyph list AND a list of the post-reorder
298    /// character indices whose corresponding halant glyph was removed
299    /// from the run. The cluster-position GSUB pass downstream uses
300    /// this to shift cluster span end indices.
301    fn apply_reph_substitutions(
302        &self,
303        glyphs: Vec<(u16, u16)>,
304        marks: &[RephMark],
305    ) -> Result<RephSubstResult, Error> {
306        if marks.is_empty() {
307            return Ok((glyphs, Vec::new()));
308        }
309        // Process marks back-to-front so earlier RA / halant indices
310        // stay stable while we splice out the halant slots.
311        let mut out = glyphs;
312        let mut dropped: Vec<usize> = Vec::new();
313        for mark in marks.iter().rev() {
314            // Bounds + face-coverage sanity. The reph mark records the
315            // RA's index into the post-reorder character stream which
316            // matches `out` 1:1.
317            if mark.ra_idx >= out.len() || mark.halant_idx >= out.len() {
318                continue;
319            }
320            let (ra_face_idx, ra_gid) = out[mark.ra_idx];
321            // The reph substitution only fires when the RA glyph
322            // actually came from an in-chain face (not .notdef on face
323            // 0 because no face had it).
324            if ra_gid == 0 {
325                continue;
326            }
327            let face = &self.faces[ra_face_idx as usize];
328            // Look up the script's `rphf` feature on this face. We try
329            // the modern Indic2 tag first (`dev2` / `bng2`), then the
330            // legacy v1 tag (`deva` / `beng`) for older fonts.
331            let (modern, legacy) = match script_indic_tags(mark.script) {
332                Some(p) => p,
333                None => continue,
334            };
335            let new_ra = face.with_font(|font| {
336                let mut substitute: Option<u16> = None;
337                for tag in [modern, legacy] {
338                    let features = font.gsub_features_for_script(tag, None);
339                    for feat in features {
340                        if feat.tag == *b"rphf" {
341                            for &lookup_idx in &feat.lookup_indices {
342                                if let Some(g) = font.gsub_apply_lookup_type_1(lookup_idx, ra_gid) {
343                                    substitute = Some(g);
344                                    break;
345                                }
346                            }
347                            if substitute.is_some() {
348                                break;
349                            }
350                        }
351                    }
352                    if substitute.is_some() {
353                        break;
354                    }
355                }
356                substitute
357            })?;
358            if let Some(reph_gid) = new_ra {
359                // Rewrite the RA glyph to its reph form.
360                out[mark.ra_idx] = (ra_face_idx, reph_gid);
361                // Drop the halant glyph (it's redundant once the reph
362                // is in place — the visual reph mark stands in for the
363                // RA + halant pair). We splice it out of the assigned
364                // glyph list so the downstream shaper sees the
365                // collapsed cluster.
366                if mark.halant_idx < out.len() {
367                    out.remove(mark.halant_idx);
368                    dropped.push(mark.halant_idx);
369                }
370            }
371        }
372        Ok((out, dropped))
373    }
374
375    /// Round-11 cluster-position-aware GSUB pass. For every Indic
376    /// cluster span, dispatch the position-driven GSUB features:
377    ///
378    /// - **`half`** — applied to a base consonant immediately followed
379    ///   by a halant when the cluster has more characters after the
380    ///   halant (i.e. the consonant is in the *non-final* slot of a
381    ///   conjunct — its inherent vowel is suppressed and a "half form"
382    ///   glyph is the canonical shape).
383    /// - **`pref` / `blwf` / `abvf` / `pstf`** — applied to a consonant
384    ///   that follows a halant. The Telugu/Kannada/Malayalam family
385    ///   distinguishes the position by glyph; we try `pref` first,
386    ///   then `blwf`, `abvf`, `pstf` in that order. The first lookup
387    ///   that returns a substitute wins. (Coverage misses pass through
388    ///   unchanged — the ordering picks the position the font has a
389    ///   form for.)
390    /// - **`pres` / `psts` / `abvs` / `blws`** — presentation-pass
391    ///   single substitutions; applied to every glyph in the cluster
392    ///   (each glyph independently — coverage misses pass through).
393    ///
394    /// Faces without any of the above lookups for the cluster's script
395    /// degrade to the round-10 behaviour (just `rphf`).
396    fn apply_cluster_position_substitutions(
397        &self,
398        glyphs: Vec<(u16, u16)>,
399        spans: &[ClusterSpan],
400        chars: &[char],
401    ) -> Result<Vec<(u16, u16)>, Error> {
402        if spans.is_empty() {
403            return Ok(glyphs);
404        }
405        let mut out = glyphs;
406        for span in spans {
407            // Bound the span to the current `out` length — reph drops
408            // may have shifted the end down; downstream
409            // `adjust_cluster_spans` clamps but be defensive.
410            let end = span.end.min(out.len());
411            if span.start >= end {
412                continue;
413            }
414            // Resolve the script's GSUB script tag pair.
415            let (modern, legacy) = match script_indic_tags(span.script) {
416                Some(p) => p,
417                None => continue,
418            };
419            // Per-position substitution for halant-suffixed conjunct
420            // components. Walk the cluster's chars (post-reorder) and
421            // identify (a) base + halant pairs (`half`) and (b)
422            // post-halant consonants (`pref` / `blwf` / `abvf` /
423            // `pstf`). The chars vec is bounded by the pre-reph
424            // cluster's char positions; reph drops shift the END down
425            // by 1 per drop but not the START — clamp to chars.len().
426            let chars_end = span.end.min(chars.len());
427            let cat_of =
428                |i: usize| -> IndicCategory { indic_category_for_script(span.script, chars[i]) };
429            let mut pos = span.start;
430            while pos < chars_end {
431                let here = cat_of(pos);
432                if here == IndicCategory::Consonant && pos + 1 < chars_end {
433                    let next = cat_of(pos + 1);
434                    if next == IndicCategory::Halant {
435                        let glyph_idx = pos.min(out.len().saturating_sub(1));
436                        let after_halant = pos + 2;
437                        // `half` form fires when there's anything after
438                        // the halant in the cluster.
439                        if after_halant < chars_end && glyph_idx < out.len() {
440                            self.try_apply_single_subst(
441                                &mut out, glyph_idx, modern, legacy, b"half",
442                            )?;
443                        }
444                        // The post-halant consonant (if any) gets the
445                        // pref / blwf / abvf / pstf cascade. We pick
446                        // the first feature whose lookup covers the
447                        // gid — the font's form-position table dictates
448                        // which one wins.
449                        if after_halant < chars_end
450                            && cat_of(after_halant) == IndicCategory::Consonant
451                            && after_halant < out.len()
452                        {
453                            for tag in [b"pref", b"blwf", b"abvf", b"pstf"] {
454                                if self.try_apply_single_subst(
455                                    &mut out,
456                                    after_halant,
457                                    modern,
458                                    legacy,
459                                    tag,
460                                )? {
461                                    break;
462                                }
463                            }
464                        }
465                    }
466                }
467                pos += 1;
468            }
469            // Presentation-pass single substitutions over every glyph
470            // in the cluster.
471            for tag in [b"pres", b"psts", b"abvs", b"blws"] {
472                for idx in span.start..end {
473                    if idx < out.len() {
474                        self.try_apply_single_subst(&mut out, idx, modern, legacy, tag)?;
475                    }
476                }
477            }
478        }
479        Ok(out)
480    }
481
482    /// Attempt to apply a feature-tagged single substitution
483    /// (LookupType 1) to `out[glyph_idx]`. Walks the modern Indic2 tag
484    /// first then the legacy v1 tag; returns `Ok(true)` when a
485    /// substitution was applied, `Ok(false)` otherwise. Glyphs whose
486    /// owning face publishes no matching feature pass through silently.
487    fn try_apply_single_subst(
488        &self,
489        out: &mut [(u16, u16)],
490        glyph_idx: usize,
491        modern: [u8; 4],
492        legacy: [u8; 4],
493        feature_tag: &[u8; 4],
494    ) -> Result<bool, Error> {
495        let (face_idx, gid) = out[glyph_idx];
496        if gid == 0 {
497            return Ok(false);
498        }
499        let face = &self.faces[face_idx as usize];
500        let new_gid = face.with_font(|font| {
501            for tag in [modern, legacy] {
502                let features = font.gsub_features_for_script(tag, None);
503                for feat in features {
504                    if &feat.tag == feature_tag {
505                        for &lookup_idx in &feat.lookup_indices {
506                            if let Some(g) = font.gsub_apply_lookup_type_1(lookup_idx, gid) {
507                                return Some(g);
508                            }
509                        }
510                    }
511                }
512            }
513            None
514        })?;
515        if let Some(g) = new_gid {
516            out[glyph_idx] = (face_idx, g);
517            Ok(true)
518        } else {
519            Ok(false)
520        }
521    }
522}
523
524/// Return type of [`FaceChain::apply_reph_substitutions`]: the
525/// rewritten `(face_idx, glyph_id)` list plus the post-reorder
526/// character indices whose halant glyph was removed.
527type RephSubstResult = (Vec<(u16, u16)>, Vec<usize>);
528
529/// Sidecar info recorded by [`apply_indic_reorder`] for every cluster
530/// whose `ClusterFlags::has_reph` was set. Carries the indices into
531/// the post-reorder character stream of the leading RA glyph and the
532/// halant immediately after it, plus the originating script.
533///
534/// Consumed by [`FaceChain::apply_reph_substitutions`] which looks up
535/// the `rphf` GSUB feature on the face that owns the RA glyph and
536/// rewrites the gid pair if a substitute is returned.
537#[derive(Debug, Clone, Copy, PartialEq, Eq)]
538struct RephMark {
539    /// Index of the RA character in the post-reorder stream.
540    ra_idx: usize,
541    /// Index of the halant character in the post-reorder stream
542    /// (always `ra_idx + 1` for current Indic scripts; tracked
543    /// explicitly so the splice operation is unambiguous).
544    halant_idx: usize,
545    /// Script the cluster originated from. Drives the OpenType script
546    /// tag pair (`dev2` / `deva` etc.) for the GSUB lookup.
547    script: Script,
548}
549
550/// Per-cluster span recorded by [`apply_indic_reorder`] so the post-cmap
551/// cluster-position-aware GSUB pass knows which glyphs belong to which
552/// Indic cluster + what script they came from.
553///
554/// Consumed by [`FaceChain::apply_cluster_position_substitutions`] (round
555/// 11) which dispatches `half` / `pref` / `blwf` / `abvf` / `pstf` for
556/// halant-suffixed conjunct components, and `pres` / `psts` / `abvs` /
557/// `blws` as presentation-pass single substitutions for every glyph in
558/// the cluster.
559#[derive(Debug, Clone, Copy, PartialEq, Eq)]
560struct ClusterSpan {
561    /// Inclusive start index in the post-reorder character stream
562    /// (matches the assigned-glyphs list 1:1).
563    start: usize,
564    /// Exclusive end index in the post-reorder character stream.
565    end: usize,
566    /// Script the cluster originated from. Drives the OpenType script
567    /// tag pair (`dev2` / `deva` etc.) for the GSUB lookup.
568    script: Script,
569}
570
571/// Pre-cmap Indic shaping pass: walk `chars`, find contiguous runs of
572/// Indic codepoints (any script we have rules for), segment each run
573/// into orthographic clusters, and apply per-script
574/// [`reorder_cluster_with`] to each (pre-base matra reorder + reph
575/// flagging). Non-Indic characters pass through untouched.
576///
577/// Returns the reordered char stream plus a list of [`RephMark`]
578/// sidecar entries (one per cluster with `ClusterFlags::has_reph`)
579/// PLUS a list of [`ClusterSpan`] entries (one per Indic cluster) so
580/// the cluster-position-aware GSUB pass downstream can dispatch
581/// per-position lookups.
582///
583/// Indices in the returned [`RephMark`]s and [`ClusterSpan`]s are into
584/// the returned character stream (the post-reorder one).
585fn apply_indic_reorder(chars: &[char]) -> (Vec<char>, Vec<RephMark>, Vec<ClusterSpan>) {
586    if chars.is_empty() {
587        return (Vec::new(), Vec::new(), Vec::new());
588    }
589    let mut out: Vec<char> = Vec::with_capacity(chars.len());
590    let mut reph_marks: Vec<RephMark> = Vec::new();
591    let mut spans: Vec<ClusterSpan> = Vec::new();
592    let mut i = 0;
593    while i < chars.len() {
594        // Walk a maximal Indic-of-one-script run starting at `i`.
595        let run_script = script_of(chars[i]);
596        let rules = match indic_rules_for_script(run_script) {
597            Some(r) => r,
598            None => {
599                // Non-Indic code point — pass through.
600                out.push(chars[i]);
601                i += 1;
602                continue;
603            }
604        };
605        let run_start = i;
606        while i < chars.len() && script_of(chars[i]) == run_script {
607            i += 1;
608        }
609        let run = &chars[run_start..i];
610        // Cluster this run + reorder each cluster.
611        let bounds = cluster_boundaries_with(run, rules.category);
612        for (s, e) in bounds {
613            let cluster = &run[s..e];
614            let (reordered, flags) = reorder_cluster_with(cluster, rules);
615            let cluster_start = out.len();
616            // Record the reph mark BEFORE we extend `out` with the
617            // reordered cluster — `out.len()` at this point is the
618            // index of the RA glyph in the post-reorder stream.
619            if flags.has_reph {
620                // The reph rule guarantees the leading RA + halant +
621                // consonant sit at positions 0 + 1 + 2 of the cluster
622                // (pre-base matras don't reorder past the leading
623                // characters; the matra moves to position 0 only when
624                // the matra is itself in the cluster — but the reph
625                // detection in `reorder_cluster_with` checks the
626                // ORIGINAL cluster, not the reordered one). To keep
627                // the indexing unambiguous, assert that the cluster's
628                // post-reorder layout has RA + halant at the cluster
629                // start when flags.has_reph is set AND no pre-base
630                // matra was reordered. When a pre-base matra DID
631                // reorder, the RA + halant sit at positions 1 + 2
632                // (after the matra at position 0).
633                let ra_offset = if flags.pre_base_reordered { 1 } else { 0 };
634                let ra_idx = cluster_start + ra_offset;
635                let halant_idx = ra_idx + 1;
636                reph_marks.push(RephMark {
637                    ra_idx,
638                    halant_idx,
639                    script: run_script,
640                });
641            }
642            out.extend_from_slice(&reordered);
643            // Record the cluster span (inclusive..exclusive) for the
644            // cluster-position-aware GSUB pass downstream.
645            spans.push(ClusterSpan {
646                start: cluster_start,
647                end: out.len(),
648                script: run_script,
649            });
650        }
651    }
652    (out, reph_marks, spans)
653}
654
655/// Shift a list of [`ClusterSpan`]s after the reph pass dropped some
656/// halant glyphs. Reph drop occurs at index `halant_idx` in the
657/// post-reorder stream which corresponds to the SECOND character of a
658/// reph cluster (positions 1 / 2 of the cluster, depending on whether a
659/// pre-base matra reordered). The drop:
660/// - shifts the END index of the affected span down by 1 (one fewer
661///   glyph in this cluster);
662/// - shifts the START + END indices of every subsequent span down by 1.
663///
664/// The cluster char positions in the chars vec are unchanged — only
665/// the GLYPH indices in `out` shift. We track this by computing per-
666/// span how many drops happened at-or-before its start (shift_start)
667/// and at-or-before its end-1 (shift_end).
668fn adjust_cluster_spans(
669    spans: &[ClusterSpan],
670    dropped: &[usize],
671    chars: &[char],
672) -> Vec<ClusterSpan> {
673    if dropped.is_empty() {
674        return spans.to_vec();
675    }
676    // Count drops strictly before `idx`.
677    let drops_before = |idx: usize| -> usize { dropped.iter().filter(|&&d| d < idx).count() };
678    spans
679        .iter()
680        .map(|s| {
681            let new_start = s.start.saturating_sub(drops_before(s.start));
682            let new_end = s.end.saturating_sub(drops_before(s.end));
683            ClusterSpan {
684                start: new_start.min(chars.len()),
685                end: new_end.min(chars.len()),
686                script: s.script,
687            }
688        })
689        .collect()
690}
691
692/// Look up a script-specific [`IndicCategory`] for `ch`. Used by the
693/// cluster-position GSUB pass to identify halant chains within a
694/// cluster span. Returns [`IndicCategory::Other`] for non-Indic scripts.
695fn indic_category_for_script(script: Script, ch: char) -> IndicCategory {
696    use crate::shaping::indic;
697    match script {
698        Script::Devanagari => indic::devanagari_category(ch),
699        Script::Bengali => indic::bengali_category(ch),
700        Script::Tamil => indic::tamil_category(ch),
701        Script::Gurmukhi => indic::gurmukhi_category(ch),
702        Script::Gujarati => indic::gujarati_category(ch),
703        Script::Telugu => indic::telugu_category(ch),
704        Script::Kannada => indic::kannada_category(ch),
705        Script::Malayalam => indic::malayalam_category(ch),
706        Script::Oriya => indic::oriya_category(ch),
707        Script::Sinhala => indic::sinhala_category(ch),
708        Script::Khmer => indic::khmer_category(ch),
709        Script::Thai => indic::thai_category(ch),
710        _ => IndicCategory::Other,
711    }
712}
713
714/// Map a [`Script`] to its Indic [`ReorderRules`], if any. Used by
715/// the per-codepoint Indic dispatch in [`apply_indic_reorder`].
716fn indic_rules_for_script(script: Script) -> Option<&'static ReorderRules> {
717    match script {
718        Script::Devanagari => Some(&DEVANAGARI_RULES),
719        Script::Bengali => Some(&BENGALI_RULES),
720        Script::Tamil => Some(&TAMIL_RULES),
721        Script::Gurmukhi => Some(&GURMUKHI_RULES),
722        Script::Gujarati => Some(&GUJARATI_RULES),
723        Script::Telugu => Some(&TELUGU_RULES),
724        Script::Kannada => Some(&KANNADA_RULES),
725        Script::Malayalam => Some(&MALAYALAM_RULES),
726        Script::Oriya => Some(&ORIYA_RULES),
727        Script::Sinhala => Some(&SINHALA_RULES),
728        Script::Khmer => Some(&KHMER_RULES),
729        Script::Thai => Some(&THAI_RULES),
730        _ => None,
731    }
732}
733
734/// Pre-cmap Arabic shaping pass: walk `chars`, find contiguous runs of
735/// Arabic codepoints, run the joining state machine on each run, and
736/// translate joining-aware base letters to their Arabic Presentation
737/// Forms-B equivalents. Non-Arabic codepoints pass through untouched.
738///
739/// This sits *before* face-chain cmap lookup so a font that only
740/// supports the FE70..FEFF block (most desktop fonts) still gets
741/// visually-correct contextual shapes. Faces that lack the
742/// presentation-form glyph fall back via the retry path in
743/// [`FaceChain::assign_codepoints`].
744fn apply_arabic_joining(chars: &[char]) -> Vec<char> {
745    if chars.is_empty() {
746        return Vec::new();
747    }
748    let mut out: Vec<char> = Vec::with_capacity(chars.len());
749    let mut i = 0;
750    while i < chars.len() {
751        // Walk a maximal Arabic-only run starting at `i`.
752        let run_start = i;
753        while i < chars.len() && script_of(chars[i]) == Script::Arabic {
754            i += 1;
755        }
756        if i > run_start {
757            let run = &chars[run_start..i];
758            let forms = compute_forms(run);
759            for (k, &ch) in run.iter().enumerate() {
760                let translated = presentation_form(ch, forms[k]).unwrap_or(ch);
761                out.push(translated);
762            }
763        }
764        // Pass non-Arabic chars through unchanged.
765        if i < chars.len() && script_of(chars[i]) != Script::Arabic {
766            out.push(chars[i]);
767            i += 1;
768        }
769    }
770    out
771}
772
773#[cfg(test)]
774#[allow(non_snake_case)] // tests reference Unicode codepoint literals + algorithm shorthands
775mod tests {
776    use super::{apply_arabic_joining, apply_indic_reorder, ClusterSpan};
777    use crate::shaping::arabic::Script;
778
779    #[test]
780    fn ascii_passes_through_unchanged() {
781        let chars: Vec<char> = "Hello".chars().collect();
782        assert_eq!(apply_arabic_joining(&chars), chars);
783    }
784
785    #[test]
786    fn devanagari_pre_base_matra_moves_to_front_of_cluster() {
787        // "कि" = KA + sign-i → sign-i + KA after Devanagari reorder.
788        let chars = vec!['\u{0915}', '\u{093F}'];
789        let (out, marks, spans) = apply_indic_reorder(&chars);
790        assert_eq!(out, vec!['\u{093F}', '\u{0915}']);
791        assert!(marks.is_empty(), "no reph in this cluster");
792        assert_eq!(spans.len(), 1);
793        assert_eq!(spans[0].start, 0);
794        assert_eq!(spans[0].end, 2);
795        assert_eq!(spans[0].script, Script::Devanagari);
796    }
797
798    #[test]
799    fn devanagari_two_clusters_each_reorder_independently() {
800        // "किकि" → two clusters; each reorders its matra to the front.
801        let chars = vec!['\u{0915}', '\u{093F}', '\u{0915}', '\u{093F}'];
802        let (out, _, spans) = apply_indic_reorder(&chars);
803        assert_eq!(out, vec!['\u{093F}', '\u{0915}', '\u{093F}', '\u{0915}']);
804        assert_eq!(spans.len(), 2);
805        assert_eq!((spans[0].start, spans[0].end), (0, 2));
806        assert_eq!((spans[1].start, spans[1].end), (2, 4));
807    }
808
809    #[test]
810    fn devanagari_conjunct_reorder_keeps_halant_chain_intact() {
811        // "क्षि" = KA + halant + SSA + sign-i. Conjunct stays in
812        // logical order; matra moves to front.
813        let chars = vec!['\u{0915}', '\u{094D}', '\u{0937}', '\u{093F}'];
814        let (out, _, _) = apply_indic_reorder(&chars);
815        assert_eq!(out, vec!['\u{093F}', '\u{0915}', '\u{094D}', '\u{0937}']);
816    }
817
818    #[test]
819    fn ascii_passes_through_indic_reorder_unchanged() {
820        // Sanity: non-Indic input must not be touched.
821        let chars: Vec<char> = "Hello".chars().collect();
822        let (out, marks, spans) = apply_indic_reorder(&chars);
823        assert_eq!(out, chars);
824        assert!(marks.is_empty());
825        // Non-Indic chars produce no cluster spans.
826        assert!(spans.is_empty());
827    }
828
829    #[test]
830    fn mixed_latin_and_devanagari_reorders_only_devanagari_clusters() {
831        // "Aकि" → Latin A passes through; Devanagari cluster reorders.
832        let chars = vec!['A', '\u{0915}', '\u{093F}'];
833        let (out, _, spans) = apply_indic_reorder(&chars);
834        assert_eq!(out, vec!['A', '\u{093F}', '\u{0915}']);
835        // Only the Devanagari cluster gets a span.
836        assert_eq!(spans.len(), 1);
837        assert_eq!((spans[0].start, spans[0].end), (1, 3));
838    }
839
840    #[test]
841    fn devanagari_reph_emits_reph_mark_at_correct_index() {
842        // RA + halant + KA → reph cluster. The mark records ra_idx=0
843        // and halant_idx=1 (no pre-base matra reorder shifted them).
844        let chars = vec!['\u{0930}', '\u{094D}', '\u{0915}'];
845        let (out, marks, _) = apply_indic_reorder(&chars);
846        assert_eq!(out, vec!['\u{0930}', '\u{094D}', '\u{0915}']);
847        assert_eq!(marks.len(), 1);
848        assert_eq!(marks[0].ra_idx, 0);
849        assert_eq!(marks[0].halant_idx, 1);
850        assert_eq!(marks[0].script, Script::Devanagari);
851    }
852
853    #[test]
854    fn devanagari_reph_with_pre_base_matra_shifts_reph_mark_by_one() {
855        // RA + halant + KA + sign-i — matra moves to position 0; RA
856        // is now at position 1, halant at 2.
857        let chars = vec!['\u{0930}', '\u{094D}', '\u{0915}', '\u{093F}'];
858        let (out, marks, _) = apply_indic_reorder(&chars);
859        assert_eq!(out, vec!['\u{093F}', '\u{0930}', '\u{094D}', '\u{0915}']);
860        assert_eq!(marks.len(), 1);
861        assert_eq!(marks[0].ra_idx, 1);
862        assert_eq!(marks[0].halant_idx, 2);
863    }
864
865    #[test]
866    fn bengali_pre_base_matra_e_moves_to_front_of_cluster() {
867        // BENGALI KA + sign-e → sign-e + KA.
868        let chars = vec!['\u{0995}', '\u{09C7}'];
869        let (out, marks, spans) = apply_indic_reorder(&chars);
870        assert_eq!(out, vec!['\u{09C7}', '\u{0995}']);
871        assert!(marks.is_empty());
872        assert_eq!(spans.len(), 1);
873        assert_eq!(spans[0].script, Script::Bengali);
874    }
875
876    #[test]
877    fn bengali_reph_emits_reph_mark_with_bengali_script_tag() {
878        // BENGALI RA + halant + KA → reph cluster.
879        let chars = vec!['\u{09B0}', '\u{09CD}', '\u{0995}'];
880        let (out, marks, _) = apply_indic_reorder(&chars);
881        assert_eq!(out, vec!['\u{09B0}', '\u{09CD}', '\u{0995}']);
882        assert_eq!(marks.len(), 1);
883        assert_eq!(marks[0].script, Script::Bengali);
884    }
885
886    #[test]
887    fn tamil_pre_base_matra_e_moves_to_front_of_cluster() {
888        // TAMIL KA + sign-e → sign-e + KA.
889        let chars = vec!['\u{0B95}', '\u{0BC6}'];
890        let (out, marks, _) = apply_indic_reorder(&chars);
891        assert_eq!(out, vec!['\u{0BC6}', '\u{0B95}']);
892        assert!(marks.is_empty());
893    }
894
895    #[test]
896    fn tamil_RA_plus_halant_does_NOT_emit_reph_mark() {
897        // TAMIL RA + pulli + KA — Tamil never forms a reph.
898        let chars = vec!['\u{0BB0}', '\u{0BCD}', '\u{0B95}'];
899        let (_out, marks, _) = apply_indic_reorder(&chars);
900        assert!(marks.is_empty(), "Tamil never sets the reph flag");
901    }
902
903    #[test]
904    fn mixed_devanagari_and_bengali_runs_segment_independently() {
905        // Devanagari KA + sign-i + Bengali KA + sign-i.
906        let chars = vec!['\u{0915}', '\u{093F}', '\u{0995}', '\u{09BF}'];
907        let (out, _, spans) = apply_indic_reorder(&chars);
908        // Each script's pre-base matra moves to the front of its OWN
909        // cluster (cluster boundary at the script switch).
910        assert_eq!(out, vec!['\u{093F}', '\u{0915}', '\u{09BF}', '\u{0995}']);
911        assert_eq!(spans.len(), 2);
912        assert_eq!(spans[0].script, Script::Devanagari);
913        assert_eq!(spans[1].script, Script::Bengali);
914    }
915
916    // ---------- Round 11 — new scripts ----------
917
918    #[test]
919    fn gurmukhi_cluster_reorder_emits_span_with_gurmukhi_script() {
920        // KA + sign-i — pre-base matra reorders.
921        let chars = vec!['\u{0A15}', '\u{0A3F}'];
922        let (out, _, spans) = apply_indic_reorder(&chars);
923        assert_eq!(out, vec!['\u{0A3F}', '\u{0A15}']);
924        assert_eq!(spans.len(), 1);
925        assert_eq!(spans[0].script, Script::Gurmukhi);
926    }
927
928    #[test]
929    fn gujarati_cluster_reorder_emits_span_with_gujarati_script() {
930        // KA + sign-i.
931        let chars = vec!['\u{0A95}', '\u{0ABF}'];
932        let (out, _, spans) = apply_indic_reorder(&chars);
933        assert_eq!(out, vec!['\u{0ABF}', '\u{0A95}']);
934        assert_eq!(spans.len(), 1);
935        assert_eq!(spans[0].script, Script::Gujarati);
936    }
937
938    #[test]
939    fn telugu_pre_base_matra_e_reorders_with_telugu_span() {
940        // KA + sign-e (pre-base) — reorder.
941        let chars = vec!['\u{0C15}', '\u{0C46}'];
942        let (out, _, spans) = apply_indic_reorder(&chars);
943        assert_eq!(out, vec!['\u{0C46}', '\u{0C15}']);
944        assert_eq!(spans.len(), 1);
945        assert_eq!(spans[0].script, Script::Telugu);
946    }
947
948    #[test]
949    fn kannada_reph_emits_reph_mark_with_kannada_script_tag() {
950        let chars = vec!['\u{0CB0}', '\u{0CCD}', '\u{0C95}'];
951        let (_out, marks, spans) = apply_indic_reorder(&chars);
952        assert_eq!(marks.len(), 1);
953        assert_eq!(marks[0].script, Script::Kannada);
954        assert_eq!(spans.len(), 1);
955        assert_eq!(spans[0].script, Script::Kannada);
956    }
957
958    #[test]
959    fn malayalam_RA_plus_halant_does_NOT_emit_reph_mark() {
960        // Modern Malayalam — chillu replaces reph.
961        let chars = vec!['\u{0D30}', '\u{0D4D}', '\u{0D15}'];
962        let (_out, marks, spans) = apply_indic_reorder(&chars);
963        assert!(marks.is_empty());
964        assert_eq!(spans.len(), 1);
965        assert_eq!(spans[0].script, Script::Malayalam);
966    }
967
968    #[test]
969    fn oriya_pre_base_matra_e_reorders_with_oriya_span() {
970        let chars = vec!['\u{0B15}', '\u{0B47}'];
971        let (out, _, spans) = apply_indic_reorder(&chars);
972        assert_eq!(out, vec!['\u{0B47}', '\u{0B15}']);
973        assert_eq!(spans.len(), 1);
974        assert_eq!(spans[0].script, Script::Oriya);
975    }
976
977    #[test]
978    fn malayalam_chillu_starts_new_cluster_from_following_consonant() {
979        // Chillu U+0D7A + KA U+0D15 — chillu is a Consonant, the next
980        // consonant starts a new cluster.
981        let chars = vec!['\u{0D7A}', '\u{0D15}'];
982        let (_out, _marks, spans) = apply_indic_reorder(&chars);
983        assert_eq!(spans.len(), 2);
984    }
985
986    // ---------- Round 12 (Brahmic non-Indic) ----------
987
988    #[test]
989    fn sinhala_pre_base_matra_reorders_with_sinhala_span() {
990        // Sinhala KA U+0D9A + sign-e U+0DD9 → sign-e + KA.
991        let chars = vec!['\u{0D9A}', '\u{0DD9}'];
992        let (out, marks, spans) = apply_indic_reorder(&chars);
993        assert_eq!(out, vec!['\u{0DD9}', '\u{0D9A}']);
994        assert!(marks.is_empty(), "Sinhala has no reph");
995        assert_eq!(spans.len(), 1);
996        assert_eq!(spans[0].script, Script::Sinhala);
997    }
998
999    #[test]
1000    fn sinhala_RA_plus_al_lakuna_does_NOT_emit_reph_mark() {
1001        // Sinhala has no superscript reph rendering.
1002        let chars = vec!['\u{0DBB}', '\u{0DCA}', '\u{0D9A}'];
1003        let (_out, marks, spans) = apply_indic_reorder(&chars);
1004        assert!(marks.is_empty());
1005        assert_eq!(spans.len(), 1);
1006        assert_eq!(spans[0].script, Script::Sinhala);
1007    }
1008
1009    #[test]
1010    fn khmer_pre_base_matra_reorders_with_khmer_span() {
1011        // Khmer KA U+1780 + sign-e U+17C1 → sign-e + KA.
1012        let chars = vec!['\u{1780}', '\u{17C1}'];
1013        let (out, _marks, spans) = apply_indic_reorder(&chars);
1014        assert_eq!(out, vec!['\u{17C1}', '\u{1780}']);
1015        assert_eq!(spans.len(), 1);
1016        assert_eq!(spans[0].script, Script::Khmer);
1017    }
1018
1019    #[test]
1020    fn khmer_coeng_keeps_subjoined_chain_in_one_cluster_span() {
1021        // KA + COENG + KHA + COENG + GA — three-deep subjoined chain.
1022        let chars = vec!['\u{1780}', '\u{17D2}', '\u{1781}', '\u{17D2}', '\u{1782}'];
1023        let (out, marks, spans) = apply_indic_reorder(&chars);
1024        assert_eq!(out, chars); // no reorder (no pre-base matra)
1025        assert!(marks.is_empty());
1026        assert_eq!(spans.len(), 1);
1027        assert_eq!((spans[0].start, spans[0].end), (0, 5));
1028        assert_eq!(spans[0].script, Script::Khmer);
1029    }
1030
1031    #[test]
1032    fn thai_no_reorder_preserves_storage_order() {
1033        // Thai SARA E (pre-base in storage) + KO KAI — already in
1034        // visual order; cluster machine starts a new cluster at each.
1035        let chars = vec!['\u{0E40}', '\u{0E01}'];
1036        let (out, marks, spans) = apply_indic_reorder(&chars);
1037        assert_eq!(out, chars);
1038        assert!(marks.is_empty());
1039        assert_eq!(spans.len(), 2);
1040        assert_eq!(spans[0].script, Script::Thai);
1041        assert_eq!(spans[1].script, Script::Thai);
1042    }
1043
1044    #[test]
1045    fn thai_consonant_with_above_vowel_and_tone_emits_one_span() {
1046        // KO KAI + SARA I (above) + MAI THO (tone) — single cluster.
1047        let chars = vec!['\u{0E01}', '\u{0E34}', '\u{0E49}'];
1048        let (out, marks, spans) = apply_indic_reorder(&chars);
1049        assert_eq!(out, chars);
1050        assert!(marks.is_empty());
1051        assert_eq!(spans.len(), 1);
1052        assert_eq!((spans[0].start, spans[0].end), (0, 3));
1053        assert_eq!(spans[0].script, Script::Thai);
1054    }
1055
1056    #[test]
1057    fn mixed_devanagari_and_thai_segments_at_script_boundary() {
1058        // Devanagari KA + Thai KO KAI — different scripts, two
1059        // independent clusters.
1060        let chars = vec!['\u{0915}', '\u{0E01}'];
1061        let (_out, marks, spans) = apply_indic_reorder(&chars);
1062        assert!(marks.is_empty());
1063        assert_eq!(spans.len(), 2);
1064        assert_eq!(spans[0].script, Script::Devanagari);
1065        assert_eq!(spans[1].script, Script::Thai);
1066    }
1067
1068    #[test]
1069    fn adjust_cluster_spans_shifts_subsequent_spans_after_drop() {
1070        use super::adjust_cluster_spans;
1071        let chars = vec!['a'; 10];
1072        let spans = vec![
1073            ClusterSpan {
1074                start: 0,
1075                end: 3,
1076                script: Script::Devanagari,
1077            },
1078            ClusterSpan {
1079                start: 3,
1080                end: 6,
1081                script: Script::Devanagari,
1082            },
1083        ];
1084        // Pretend reph dropped the halant at index 1 (in cluster 0).
1085        let dropped = vec![1usize];
1086        let adjusted = adjust_cluster_spans(&spans, &dropped, &chars);
1087        // Cluster 0 shrinks by 1 at the end.
1088        assert_eq!((adjusted[0].start, adjusted[0].end), (0, 2));
1089        // Cluster 1 shifts both start and end down by 1.
1090        assert_eq!((adjusted[1].start, adjusted[1].end), (2, 5));
1091    }
1092
1093    #[test]
1094    fn arabic_run_translates_to_presentation_forms() {
1095        // BEH BEH BEH → Init Medi Fina presentation forms.
1096        // 0x0628 BEH → init 0xFE91, medi 0xFE92, fina 0xFE90.
1097        let chars = vec!['\u{0628}', '\u{0628}', '\u{0628}'];
1098        let out = apply_arabic_joining(&chars);
1099        assert_eq!(out, vec!['\u{FE91}', '\u{FE92}', '\u{FE90}']);
1100    }
1101
1102    #[test]
1103    fn arabic_run_with_ascii_separator() {
1104        // BEH BEH SPACE BEH BEH → first BEHs become Init/Fina, space
1105        // unchanged, second BEHs become Init/Fina.
1106        let chars = vec!['\u{0628}', '\u{0628}', ' ', '\u{0628}', '\u{0628}'];
1107        let out = apply_arabic_joining(&chars);
1108        assert_eq!(
1109            out,
1110            vec!['\u{FE91}', '\u{FE90}', ' ', '\u{FE91}', '\u{FE90}']
1111        );
1112    }
1113
1114    // Mock-style tests are covered in the integration test file, where
1115    // we build a 2-face chain and verify face_idx routing. Unit-level
1116    // testing here is awkward because Face requires real TTF bytes.
1117}