oxideav_scribe/face_chain.rs
1//! `FaceChain` — ordered list of faces consulted in priority order
2//! when shaping. Round-2 fallback support: when the primary face
3//! doesn't have a glyph for a codepoint, the chain walks down the list
4//! until one does, falling back to the primary's `.notdef` only if no
5//! face provides a glyph.
6//!
7//! ## Design
8//!
9//! Each codepoint is mapped to `(face_idx, glyph_id)` independently;
10//! ligature substitution + kerning then run on the resulting glyph runs
11//! face-by-face (because GSUB / GPOS lookups are face-local — you can't
12//! ligate an "f" from face A with an "i" from face B).
13//!
14//! The fallback decision is "first face where `glyph_index` returns a
15//! non-zero, defined glyph". A returned `0` is treated as `.notdef`
16//! (i.e. *not present*) because the primary face's `.notdef` is what
17//! we'd use as the *final* fallback anyway, and skipping over it lets
18//! a fallback face provide a real glyph.
19//!
20//! ## Cache key impact
21//!
22//! `PositionedGlyph::face_idx` lets the rasterizer pick the right face
23//! out of the chain. The cache key already keys by `face_id` (per-face,
24//! globally unique), so a glyph from face[0] and a glyph from face[1]
25//! with the same numerical `glyph_id` never collide.
26
27use crate::face::Face;
28use crate::shaper::{shape_run_with_font, PositionedGlyph};
29use crate::shaping::arabic::{compute_forms, script_of, Script};
30use crate::shaping::arabic_pf::presentation_form;
31use crate::shaping::indic::{
32 cluster_boundaries_with, reorder_cluster_with, script_indic_tags, IndicCategory, ReorderRules,
33 BENGALI_RULES, DEVANAGARI_RULES, GUJARATI_RULES, GURMUKHI_RULES, KANNADA_RULES, KHMER_RULES,
34 MALAYALAM_RULES, ORIYA_RULES, SINHALA_RULES, TAMIL_RULES, TELUGU_RULES, THAI_RULES,
35};
36use crate::style::Style;
37use crate::Error;
38
39/// Ordered chain of faces. Index 0 is the primary; index N is consulted
40/// only if 0..N all returned `.notdef` for a codepoint.
41#[derive(Debug)]
42pub struct FaceChain {
43 faces: Vec<Face>,
44}
45
46impl FaceChain {
47 /// Build a chain from a single primary face. Use
48 /// [`FaceChain::push_fallback`] (chainable) to append fallbacks.
49 pub fn new(primary: Face) -> Self {
50 Self {
51 faces: vec![primary],
52 }
53 }
54
55 /// Append a fallback face to the end of the chain. Builder-style:
56 /// `FaceChain::new(latin).push_fallback(cjk).push_fallback(emoji)`.
57 #[must_use]
58 pub fn push_fallback(mut self, face: Face) -> Self {
59 self.faces.push(face);
60 self
61 }
62
63 /// Number of faces in the chain (including the primary).
64 pub fn len(&self) -> usize {
65 self.faces.len()
66 }
67
68 /// True if the chain has no faces — never the case for chains
69 /// constructed via [`FaceChain::new`], present only because clippy
70 /// rightly complains when `len()` exists alone.
71 pub fn is_empty(&self) -> bool {
72 self.faces.is_empty()
73 }
74
75 /// Borrow face at `idx`. Panics if `idx >= len()` — the rasterizer
76 /// always reads `face_idx` from a `PositionedGlyph` produced by
77 /// this chain so the index is bounded by construction.
78 pub fn face(&self, idx: u16) -> &Face {
79 &self.faces[idx as usize]
80 }
81
82 /// Borrow the primary face — useful for size/metric queries.
83 pub fn primary(&self) -> &Face {
84 &self.faces[0]
85 }
86
87 /// Mutably borrow face at `idx`. Used to flip per-face state like
88 /// variation coordinates without rebuilding the chain. Panics if
89 /// `idx >= len()`.
90 pub fn face_mut(&mut self, idx: usize) -> &mut Face {
91 &mut self.faces[idx]
92 }
93
94 /// Set the variation coordinates on the **primary face** (index 0).
95 /// Convenience wrapper around [`Face::set_variation_coords`] for
96 /// the common case of "shape this run at `wght=600 / wdth=125`".
97 /// Mirrors [`Face::set_variation_coords`]'s clamp + length cap and
98 /// returns its error variant unchanged.
99 ///
100 /// Fallback faces in the chain are NOT touched — call
101 /// [`FaceChain::face_mut`] explicitly if a fallback also needs
102 /// variation coords (rare in practice; fallback faces typically
103 /// cover a different script and are loaded from a static cut).
104 pub fn set_variation_coords(&mut self, coords: &[f32]) -> Result<(), Error> {
105 self.faces[0].set_variation_coords(coords)
106 }
107
108 /// Named instances published by the face at `face_index`. Empty
109 /// vec when the face is static / OTF, or when the index is out of
110 /// range. Mirrors [`Face::named_instances`] for the chosen face.
111 pub fn named_instances(&self, face_index: usize) -> Vec<crate::NamedInstance> {
112 self.faces
113 .get(face_index)
114 .map(|f| f.named_instances())
115 .unwrap_or_default()
116 }
117
118 /// Variation axes published by the face at `face_index`. Empty vec
119 /// when the face is static / OTF, or when the index is out of
120 /// range. Mirrors [`Face::variation_axes`] for the chosen face.
121 pub fn variation_axes(&self, face_index: usize) -> Vec<crate::VariationAxis> {
122 self.faces
123 .get(face_index)
124 .map(|f| f.variation_axes())
125 .unwrap_or_default()
126 }
127
128 /// Shape `text` with full chain fallback at default style (upright,
129 /// regular).
130 pub fn shape(&self, text: &str, size_px: f32) -> Result<Vec<PositionedGlyph>, Error> {
131 self.shape_styled(text, size_px, Style::REGULAR)
132 }
133
134 /// Shape `text` honouring `style` (italic / weight). The shear
135 /// derived from `style` is applied at rasterise-time, not at
136 /// shape-time — so glyph positions / advances stay identical
137 /// regardless of italic. This matches what desktop shapers do
138 /// (synthesised italic doesn't change the metrics).
139 pub fn shape_styled(
140 &self,
141 text: &str,
142 size_px: f32,
143 _style: Style,
144 ) -> Result<Vec<PositionedGlyph>, Error> {
145 if text.is_empty() || size_px <= 0.0 {
146 return Ok(Vec::new());
147 }
148
149 // Step 1: per-codepoint, decide which face owns this glyph.
150 // Result: Vec<(face_idx, glyph_id)>.
151 let assigned = self.assign_codepoints(text)?;
152 if assigned.is_empty() {
153 return Ok(Vec::new());
154 }
155
156 // Step 2: walk runs of consecutive (face_idx) glyphs and shape
157 // each run within the appropriate face. Each run gets its own
158 // GSUB + GPOS pass.
159 let mut out: Vec<PositionedGlyph> = Vec::with_capacity(assigned.len());
160 let mut run_start = 0usize;
161 while run_start < assigned.len() {
162 let face_idx = assigned[run_start].0;
163 let mut run_end = run_start + 1;
164 while run_end < assigned.len() && assigned[run_end].0 == face_idx {
165 run_end += 1;
166 }
167 let gids: Vec<u16> = assigned[run_start..run_end].iter().map(|p| p.1).collect();
168 let face = &self.faces[face_idx as usize];
169 let mut run_glyphs =
170 face.with_font(|font| shape_run_with_font(font, &gids, size_px, face_idx))?;
171 out.append(&mut run_glyphs);
172 run_start = run_end;
173 }
174 Ok(out)
175 }
176
177 /// Per-codepoint face assignment. For every char in `text`, walk
178 /// the chain and pick the first face whose `glyph_index` returns a
179 /// non-zero glyph id. If none does, fall back to face 0 with glyph
180 /// 0 (.notdef) — measurement still works, the user sees tofu.
181 ///
182 /// **Round 7 — Arabic contextual joining.** Before cmap lookup the
183 /// input chars are run through the joining state machine in
184 /// [`crate::shaping::arabic`] and Arabic letters are translated to
185 /// their Presentation Forms-B equivalents (U+FE70..U+FEFF). Forms
186 /// the active face doesn't have a glyph for fall back to the
187 /// original base codepoint — so the worst case is "render in
188 /// isolated form" (the round-6 behaviour), which is the right
189 /// graceful degradation for a font that ships only base glyphs.
190 ///
191 /// **Round 8 — Devanagari cluster reorder.** Devanagari runs are
192 /// segmented into clusters (one base consonant with its halant
193 /// chains, matras, and modifiers) and each cluster is rewritten
194 /// to its visual order: pre-base matras (U+093F) move to the
195 /// front of the cluster so a cmap-only font still draws the
196 /// cluster correctly. Reph identification is performed but the
197 /// actual glyph substitution is gated on the `rphf` GSUB feature
198 /// (followup once `oxideav-ttf` exposes feature-tagged single
199 /// substitution).
200 fn assign_codepoints(&self, text: &str) -> Result<Vec<(u16, u16)>, Error> {
201 let chars: Vec<char> = text.chars().collect();
202 // Three pre-cmap shaping passes: Arabic joining → Indic
203 // cluster reorder → reph GSUB (post-cmap, deferred to after
204 // the codepoint→glyph assignment loop). The scripts are
205 // pairwise disjoint so the order doesn't matter for any one;
206 // we run Arabic first to match the round-7 behaviour exactly
207 // when no Indic input is present.
208 let arabic_shaped = apply_arabic_joining(&chars);
209 let (shaped_chars, reph_marks, cluster_spans) = apply_indic_reorder(&arabic_shaped);
210 let mut out: Vec<(u16, u16)> = Vec::with_capacity(shaped_chars.len());
211 for (orig_idx, ch) in shaped_chars.iter().enumerate() {
212 let mut found: Option<(u16, u16)> = None;
213 for (idx, face) in self.faces.iter().enumerate() {
214 let g = face.with_font(|font| font.glyph_index(*ch))?;
215 match g {
216 Some(gid) if gid != 0 => {
217 found = Some((idx as u16, gid));
218 break;
219 }
220 _ => continue,
221 }
222 }
223 // If the substituted presentation-form is missing in every
224 // face, retry with the corresponding original (base)
225 // codepoint — this is the graceful fallback the doc-comment
226 // describes. The Indic pass only reorders (no
227 // substitution), so the original char is already present
228 // somewhere in `chars`; the Arabic pass substitutes 1:1
229 // so `chars[orig_idx]` is the right base when the
230 // permutation is identity. Use position equality where
231 // possible, fall back to char-value lookup otherwise.
232 if found.is_none() {
233 let orig = if orig_idx < chars.len() && *ch != chars[orig_idx] {
234 Some(chars[orig_idx])
235 } else if !chars.contains(ch) {
236 None
237 } else {
238 // The shaped char is present in the original input
239 // — no substitution happened, so the retry is a
240 // no-op (the same lookup we already did failed).
241 None
242 };
243 if let Some(orig) = orig {
244 for (idx, face) in self.faces.iter().enumerate() {
245 let g = face.with_font(|font| font.glyph_index(orig))?;
246 if let Some(gid) = g {
247 if gid != 0 {
248 found = Some((idx as u16, gid));
249 break;
250 }
251 }
252 }
253 }
254 }
255 // No face had it — render as primary's .notdef.
256 out.push(found.unwrap_or((0, 0)));
257 }
258 // Apply reph GSUB substitution: for each `RephMark` we identified
259 // in the pre-cmap pass, look up the `rphf` feature on the
260 // *assigned* face for the RA glyph, apply LookupType 1, and if
261 // a substitute is returned, rewrite the RA gid + drop the halant
262 // by replacing the halant slot with the same gid as the
263 // following base consonant — i.e. the cluster collapses
264 // [reph_gid, halant, base, ...] → [reph_gid, base, base, ...]
265 // and the duplicate base is removed below. Marks for which the
266 // active face publishes no `rphf` lookup are silently skipped
267 // (the cluster falls back to the in-line RA + halant + base
268 // rendering, which is the round-8 behaviour).
269 //
270 // We process marks back-to-front so the index manipulation
271 // stays straightforward (no shifting of pending marks).
272 let (out, dropped_halants) = self.apply_reph_substitutions(out, &reph_marks)?;
273 // Round 11 — cluster-position-aware GSUB pass. For every Indic
274 // cluster, dispatch `half` / `pref` / `blwf` / `abvf` / `pstf`
275 // (per-position substitution) on halant-suffixed conjunct
276 // components, then `pres` / `psts` / `abvs` / `blws` (cluster-
277 // wide presentation features) on every glyph in the cluster.
278 // Coverage misses pass through unchanged so a font without a
279 // given lookup degrades gracefully (the round-10 behaviour).
280 //
281 // The reph pass may have removed halant glyphs from `out`; the
282 // `dropped_halants` Vec tells us which post-reorder character
283 // indices are now absent so we can shift cluster span ends
284 // accordingly. (Reph removal touches the END of a cluster's
285 // first 3 chars, never its boundary, so the START index is
286 // always still valid.)
287 let adjusted_spans = adjust_cluster_spans(&cluster_spans, &dropped_halants, &shaped_chars);
288 let out = self.apply_cluster_position_substitutions(out, &adjusted_spans, &shaped_chars)?;
289 Ok(out)
290 }
291
292 /// For each `RephMark`, apply the active face's `rphf` GSUB lookup
293 /// to the RA glyph and drop the halant glyph if a substitute is
294 /// returned. Marks for which no `rphf` lookup applies pass through
295 /// unchanged.
296 ///
297 /// Returns the rewritten glyph list AND a list of the post-reorder
298 /// character indices whose corresponding halant glyph was removed
299 /// from the run. The cluster-position GSUB pass downstream uses
300 /// this to shift cluster span end indices.
301 fn apply_reph_substitutions(
302 &self,
303 glyphs: Vec<(u16, u16)>,
304 marks: &[RephMark],
305 ) -> Result<RephSubstResult, Error> {
306 if marks.is_empty() {
307 return Ok((glyphs, Vec::new()));
308 }
309 // Process marks back-to-front so earlier RA / halant indices
310 // stay stable while we splice out the halant slots.
311 let mut out = glyphs;
312 let mut dropped: Vec<usize> = Vec::new();
313 for mark in marks.iter().rev() {
314 // Bounds + face-coverage sanity. The reph mark records the
315 // RA's index into the post-reorder character stream which
316 // matches `out` 1:1.
317 if mark.ra_idx >= out.len() || mark.halant_idx >= out.len() {
318 continue;
319 }
320 let (ra_face_idx, ra_gid) = out[mark.ra_idx];
321 // The reph substitution only fires when the RA glyph
322 // actually came from an in-chain face (not .notdef on face
323 // 0 because no face had it).
324 if ra_gid == 0 {
325 continue;
326 }
327 let face = &self.faces[ra_face_idx as usize];
328 // Look up the script's `rphf` feature on this face. We try
329 // the modern Indic2 tag first (`dev2` / `bng2`), then the
330 // legacy v1 tag (`deva` / `beng`) for older fonts.
331 let (modern, legacy) = match script_indic_tags(mark.script) {
332 Some(p) => p,
333 None => continue,
334 };
335 let new_ra = face.with_font(|font| {
336 let mut substitute: Option<u16> = None;
337 for tag in [modern, legacy] {
338 let features = font.gsub_features_for_script(tag, None);
339 for feat in features {
340 if feat.tag == *b"rphf" {
341 for &lookup_idx in &feat.lookup_indices {
342 if let Some(g) = font.gsub_apply_lookup_type_1(lookup_idx, ra_gid) {
343 substitute = Some(g);
344 break;
345 }
346 }
347 if substitute.is_some() {
348 break;
349 }
350 }
351 }
352 if substitute.is_some() {
353 break;
354 }
355 }
356 substitute
357 })?;
358 if let Some(reph_gid) = new_ra {
359 // Rewrite the RA glyph to its reph form.
360 out[mark.ra_idx] = (ra_face_idx, reph_gid);
361 // Drop the halant glyph (it's redundant once the reph
362 // is in place — the visual reph mark stands in for the
363 // RA + halant pair). We splice it out of the assigned
364 // glyph list so the downstream shaper sees the
365 // collapsed cluster.
366 if mark.halant_idx < out.len() {
367 out.remove(mark.halant_idx);
368 dropped.push(mark.halant_idx);
369 }
370 }
371 }
372 Ok((out, dropped))
373 }
374
375 /// Round-11 cluster-position-aware GSUB pass. For every Indic
376 /// cluster span, dispatch the position-driven GSUB features:
377 ///
378 /// - **`half`** — applied to a base consonant immediately followed
379 /// by a halant when the cluster has more characters after the
380 /// halant (i.e. the consonant is in the *non-final* slot of a
381 /// conjunct — its inherent vowel is suppressed and a "half form"
382 /// glyph is the canonical shape).
383 /// - **`pref` / `blwf` / `abvf` / `pstf`** — applied to a consonant
384 /// that follows a halant. The Telugu/Kannada/Malayalam family
385 /// distinguishes the position by glyph; we try `pref` first,
386 /// then `blwf`, `abvf`, `pstf` in that order. The first lookup
387 /// that returns a substitute wins. (Coverage misses pass through
388 /// unchanged — the ordering picks the position the font has a
389 /// form for.)
390 /// - **`pres` / `psts` / `abvs` / `blws`** — presentation-pass
391 /// single substitutions; applied to every glyph in the cluster
392 /// (each glyph independently — coverage misses pass through).
393 ///
394 /// Faces without any of the above lookups for the cluster's script
395 /// degrade to the round-10 behaviour (just `rphf`).
396 fn apply_cluster_position_substitutions(
397 &self,
398 glyphs: Vec<(u16, u16)>,
399 spans: &[ClusterSpan],
400 chars: &[char],
401 ) -> Result<Vec<(u16, u16)>, Error> {
402 if spans.is_empty() {
403 return Ok(glyphs);
404 }
405 let mut out = glyphs;
406 for span in spans {
407 // Bound the span to the current `out` length — reph drops
408 // may have shifted the end down; downstream
409 // `adjust_cluster_spans` clamps but be defensive.
410 let end = span.end.min(out.len());
411 if span.start >= end {
412 continue;
413 }
414 // Resolve the script's GSUB script tag pair.
415 let (modern, legacy) = match script_indic_tags(span.script) {
416 Some(p) => p,
417 None => continue,
418 };
419 // Per-position substitution for halant-suffixed conjunct
420 // components. Walk the cluster's chars (post-reorder) and
421 // identify (a) base + halant pairs (`half`) and (b)
422 // post-halant consonants (`pref` / `blwf` / `abvf` /
423 // `pstf`). The chars vec is bounded by the pre-reph
424 // cluster's char positions; reph drops shift the END down
425 // by 1 per drop but not the START — clamp to chars.len().
426 let chars_end = span.end.min(chars.len());
427 let cat_of =
428 |i: usize| -> IndicCategory { indic_category_for_script(span.script, chars[i]) };
429 let mut pos = span.start;
430 while pos < chars_end {
431 let here = cat_of(pos);
432 if here == IndicCategory::Consonant && pos + 1 < chars_end {
433 let next = cat_of(pos + 1);
434 if next == IndicCategory::Halant {
435 let glyph_idx = pos.min(out.len().saturating_sub(1));
436 let after_halant = pos + 2;
437 // `half` form fires when there's anything after
438 // the halant in the cluster.
439 if after_halant < chars_end && glyph_idx < out.len() {
440 self.try_apply_single_subst(
441 &mut out, glyph_idx, modern, legacy, b"half",
442 )?;
443 }
444 // The post-halant consonant (if any) gets the
445 // pref / blwf / abvf / pstf cascade. We pick
446 // the first feature whose lookup covers the
447 // gid — the font's form-position table dictates
448 // which one wins.
449 if after_halant < chars_end
450 && cat_of(after_halant) == IndicCategory::Consonant
451 && after_halant < out.len()
452 {
453 for tag in [b"pref", b"blwf", b"abvf", b"pstf"] {
454 if self.try_apply_single_subst(
455 &mut out,
456 after_halant,
457 modern,
458 legacy,
459 tag,
460 )? {
461 break;
462 }
463 }
464 }
465 }
466 }
467 pos += 1;
468 }
469 // Presentation-pass single substitutions over every glyph
470 // in the cluster.
471 for tag in [b"pres", b"psts", b"abvs", b"blws"] {
472 for idx in span.start..end {
473 if idx < out.len() {
474 self.try_apply_single_subst(&mut out, idx, modern, legacy, tag)?;
475 }
476 }
477 }
478 }
479 Ok(out)
480 }
481
482 /// Attempt to apply a feature-tagged single substitution
483 /// (LookupType 1) to `out[glyph_idx]`. Walks the modern Indic2 tag
484 /// first then the legacy v1 tag; returns `Ok(true)` when a
485 /// substitution was applied, `Ok(false)` otherwise. Glyphs whose
486 /// owning face publishes no matching feature pass through silently.
487 fn try_apply_single_subst(
488 &self,
489 out: &mut [(u16, u16)],
490 glyph_idx: usize,
491 modern: [u8; 4],
492 legacy: [u8; 4],
493 feature_tag: &[u8; 4],
494 ) -> Result<bool, Error> {
495 let (face_idx, gid) = out[glyph_idx];
496 if gid == 0 {
497 return Ok(false);
498 }
499 let face = &self.faces[face_idx as usize];
500 let new_gid = face.with_font(|font| {
501 for tag in [modern, legacy] {
502 let features = font.gsub_features_for_script(tag, None);
503 for feat in features {
504 if &feat.tag == feature_tag {
505 for &lookup_idx in &feat.lookup_indices {
506 if let Some(g) = font.gsub_apply_lookup_type_1(lookup_idx, gid) {
507 return Some(g);
508 }
509 }
510 }
511 }
512 }
513 None
514 })?;
515 if let Some(g) = new_gid {
516 out[glyph_idx] = (face_idx, g);
517 Ok(true)
518 } else {
519 Ok(false)
520 }
521 }
522}
523
524/// Return type of [`FaceChain::apply_reph_substitutions`]: the
525/// rewritten `(face_idx, glyph_id)` list plus the post-reorder
526/// character indices whose halant glyph was removed.
527type RephSubstResult = (Vec<(u16, u16)>, Vec<usize>);
528
529/// Sidecar info recorded by [`apply_indic_reorder`] for every cluster
530/// whose `ClusterFlags::has_reph` was set. Carries the indices into
531/// the post-reorder character stream of the leading RA glyph and the
532/// halant immediately after it, plus the originating script.
533///
534/// Consumed by [`FaceChain::apply_reph_substitutions`] which looks up
535/// the `rphf` GSUB feature on the face that owns the RA glyph and
536/// rewrites the gid pair if a substitute is returned.
537#[derive(Debug, Clone, Copy, PartialEq, Eq)]
538struct RephMark {
539 /// Index of the RA character in the post-reorder stream.
540 ra_idx: usize,
541 /// Index of the halant character in the post-reorder stream
542 /// (always `ra_idx + 1` for current Indic scripts; tracked
543 /// explicitly so the splice operation is unambiguous).
544 halant_idx: usize,
545 /// Script the cluster originated from. Drives the OpenType script
546 /// tag pair (`dev2` / `deva` etc.) for the GSUB lookup.
547 script: Script,
548}
549
550/// Per-cluster span recorded by [`apply_indic_reorder`] so the post-cmap
551/// cluster-position-aware GSUB pass knows which glyphs belong to which
552/// Indic cluster + what script they came from.
553///
554/// Consumed by [`FaceChain::apply_cluster_position_substitutions`] (round
555/// 11) which dispatches `half` / `pref` / `blwf` / `abvf` / `pstf` for
556/// halant-suffixed conjunct components, and `pres` / `psts` / `abvs` /
557/// `blws` as presentation-pass single substitutions for every glyph in
558/// the cluster.
559#[derive(Debug, Clone, Copy, PartialEq, Eq)]
560struct ClusterSpan {
561 /// Inclusive start index in the post-reorder character stream
562 /// (matches the assigned-glyphs list 1:1).
563 start: usize,
564 /// Exclusive end index in the post-reorder character stream.
565 end: usize,
566 /// Script the cluster originated from. Drives the OpenType script
567 /// tag pair (`dev2` / `deva` etc.) for the GSUB lookup.
568 script: Script,
569}
570
571/// Pre-cmap Indic shaping pass: walk `chars`, find contiguous runs of
572/// Indic codepoints (any script we have rules for), segment each run
573/// into orthographic clusters, and apply per-script
574/// [`reorder_cluster_with`] to each (pre-base matra reorder + reph
575/// flagging). Non-Indic characters pass through untouched.
576///
577/// Returns the reordered char stream plus a list of [`RephMark`]
578/// sidecar entries (one per cluster with `ClusterFlags::has_reph`)
579/// PLUS a list of [`ClusterSpan`] entries (one per Indic cluster) so
580/// the cluster-position-aware GSUB pass downstream can dispatch
581/// per-position lookups.
582///
583/// Indices in the returned [`RephMark`]s and [`ClusterSpan`]s are into
584/// the returned character stream (the post-reorder one).
585fn apply_indic_reorder(chars: &[char]) -> (Vec<char>, Vec<RephMark>, Vec<ClusterSpan>) {
586 if chars.is_empty() {
587 return (Vec::new(), Vec::new(), Vec::new());
588 }
589 let mut out: Vec<char> = Vec::with_capacity(chars.len());
590 let mut reph_marks: Vec<RephMark> = Vec::new();
591 let mut spans: Vec<ClusterSpan> = Vec::new();
592 let mut i = 0;
593 while i < chars.len() {
594 // Walk a maximal Indic-of-one-script run starting at `i`.
595 let run_script = script_of(chars[i]);
596 let rules = match indic_rules_for_script(run_script) {
597 Some(r) => r,
598 None => {
599 // Non-Indic code point — pass through.
600 out.push(chars[i]);
601 i += 1;
602 continue;
603 }
604 };
605 let run_start = i;
606 while i < chars.len() && script_of(chars[i]) == run_script {
607 i += 1;
608 }
609 let run = &chars[run_start..i];
610 // Cluster this run + reorder each cluster.
611 let bounds = cluster_boundaries_with(run, rules.category);
612 for (s, e) in bounds {
613 let cluster = &run[s..e];
614 let (reordered, flags) = reorder_cluster_with(cluster, rules);
615 let cluster_start = out.len();
616 // Record the reph mark BEFORE we extend `out` with the
617 // reordered cluster — `out.len()` at this point is the
618 // index of the RA glyph in the post-reorder stream.
619 if flags.has_reph {
620 // The reph rule guarantees the leading RA + halant +
621 // consonant sit at positions 0 + 1 + 2 of the cluster
622 // (pre-base matras don't reorder past the leading
623 // characters; the matra moves to position 0 only when
624 // the matra is itself in the cluster — but the reph
625 // detection in `reorder_cluster_with` checks the
626 // ORIGINAL cluster, not the reordered one). To keep
627 // the indexing unambiguous, assert that the cluster's
628 // post-reorder layout has RA + halant at the cluster
629 // start when flags.has_reph is set AND no pre-base
630 // matra was reordered. When a pre-base matra DID
631 // reorder, the RA + halant sit at positions 1 + 2
632 // (after the matra at position 0).
633 let ra_offset = if flags.pre_base_reordered { 1 } else { 0 };
634 let ra_idx = cluster_start + ra_offset;
635 let halant_idx = ra_idx + 1;
636 reph_marks.push(RephMark {
637 ra_idx,
638 halant_idx,
639 script: run_script,
640 });
641 }
642 out.extend_from_slice(&reordered);
643 // Record the cluster span (inclusive..exclusive) for the
644 // cluster-position-aware GSUB pass downstream.
645 spans.push(ClusterSpan {
646 start: cluster_start,
647 end: out.len(),
648 script: run_script,
649 });
650 }
651 }
652 (out, reph_marks, spans)
653}
654
655/// Shift a list of [`ClusterSpan`]s after the reph pass dropped some
656/// halant glyphs. Reph drop occurs at index `halant_idx` in the
657/// post-reorder stream which corresponds to the SECOND character of a
658/// reph cluster (positions 1 / 2 of the cluster, depending on whether a
659/// pre-base matra reordered). The drop:
660/// - shifts the END index of the affected span down by 1 (one fewer
661/// glyph in this cluster);
662/// - shifts the START + END indices of every subsequent span down by 1.
663///
664/// The cluster char positions in the chars vec are unchanged — only
665/// the GLYPH indices in `out` shift. We track this by computing per-
666/// span how many drops happened at-or-before its start (shift_start)
667/// and at-or-before its end-1 (shift_end).
668fn adjust_cluster_spans(
669 spans: &[ClusterSpan],
670 dropped: &[usize],
671 chars: &[char],
672) -> Vec<ClusterSpan> {
673 if dropped.is_empty() {
674 return spans.to_vec();
675 }
676 // Count drops strictly before `idx`.
677 let drops_before = |idx: usize| -> usize { dropped.iter().filter(|&&d| d < idx).count() };
678 spans
679 .iter()
680 .map(|s| {
681 let new_start = s.start.saturating_sub(drops_before(s.start));
682 let new_end = s.end.saturating_sub(drops_before(s.end));
683 ClusterSpan {
684 start: new_start.min(chars.len()),
685 end: new_end.min(chars.len()),
686 script: s.script,
687 }
688 })
689 .collect()
690}
691
692/// Look up a script-specific [`IndicCategory`] for `ch`. Used by the
693/// cluster-position GSUB pass to identify halant chains within a
694/// cluster span. Returns [`IndicCategory::Other`] for non-Indic scripts.
695fn indic_category_for_script(script: Script, ch: char) -> IndicCategory {
696 use crate::shaping::indic;
697 match script {
698 Script::Devanagari => indic::devanagari_category(ch),
699 Script::Bengali => indic::bengali_category(ch),
700 Script::Tamil => indic::tamil_category(ch),
701 Script::Gurmukhi => indic::gurmukhi_category(ch),
702 Script::Gujarati => indic::gujarati_category(ch),
703 Script::Telugu => indic::telugu_category(ch),
704 Script::Kannada => indic::kannada_category(ch),
705 Script::Malayalam => indic::malayalam_category(ch),
706 Script::Oriya => indic::oriya_category(ch),
707 Script::Sinhala => indic::sinhala_category(ch),
708 Script::Khmer => indic::khmer_category(ch),
709 Script::Thai => indic::thai_category(ch),
710 _ => IndicCategory::Other,
711 }
712}
713
714/// Map a [`Script`] to its Indic [`ReorderRules`], if any. Used by
715/// the per-codepoint Indic dispatch in [`apply_indic_reorder`].
716fn indic_rules_for_script(script: Script) -> Option<&'static ReorderRules> {
717 match script {
718 Script::Devanagari => Some(&DEVANAGARI_RULES),
719 Script::Bengali => Some(&BENGALI_RULES),
720 Script::Tamil => Some(&TAMIL_RULES),
721 Script::Gurmukhi => Some(&GURMUKHI_RULES),
722 Script::Gujarati => Some(&GUJARATI_RULES),
723 Script::Telugu => Some(&TELUGU_RULES),
724 Script::Kannada => Some(&KANNADA_RULES),
725 Script::Malayalam => Some(&MALAYALAM_RULES),
726 Script::Oriya => Some(&ORIYA_RULES),
727 Script::Sinhala => Some(&SINHALA_RULES),
728 Script::Khmer => Some(&KHMER_RULES),
729 Script::Thai => Some(&THAI_RULES),
730 _ => None,
731 }
732}
733
734/// Pre-cmap Arabic shaping pass: walk `chars`, find contiguous runs of
735/// Arabic codepoints, run the joining state machine on each run, and
736/// translate joining-aware base letters to their Arabic Presentation
737/// Forms-B equivalents. Non-Arabic codepoints pass through untouched.
738///
739/// This sits *before* face-chain cmap lookup so a font that only
740/// supports the FE70..FEFF block (most desktop fonts) still gets
741/// visually-correct contextual shapes. Faces that lack the
742/// presentation-form glyph fall back via the retry path in
743/// [`FaceChain::assign_codepoints`].
744fn apply_arabic_joining(chars: &[char]) -> Vec<char> {
745 if chars.is_empty() {
746 return Vec::new();
747 }
748 let mut out: Vec<char> = Vec::with_capacity(chars.len());
749 let mut i = 0;
750 while i < chars.len() {
751 // Walk a maximal Arabic-only run starting at `i`.
752 let run_start = i;
753 while i < chars.len() && script_of(chars[i]) == Script::Arabic {
754 i += 1;
755 }
756 if i > run_start {
757 let run = &chars[run_start..i];
758 let forms = compute_forms(run);
759 for (k, &ch) in run.iter().enumerate() {
760 let translated = presentation_form(ch, forms[k]).unwrap_or(ch);
761 out.push(translated);
762 }
763 }
764 // Pass non-Arabic chars through unchanged.
765 if i < chars.len() && script_of(chars[i]) != Script::Arabic {
766 out.push(chars[i]);
767 i += 1;
768 }
769 }
770 out
771}
772
773#[cfg(test)]
774#[allow(non_snake_case)] // tests reference Unicode codepoint literals + algorithm shorthands
775mod tests {
776 use super::{apply_arabic_joining, apply_indic_reorder, ClusterSpan};
777 use crate::shaping::arabic::Script;
778
779 #[test]
780 fn ascii_passes_through_unchanged() {
781 let chars: Vec<char> = "Hello".chars().collect();
782 assert_eq!(apply_arabic_joining(&chars), chars);
783 }
784
785 #[test]
786 fn devanagari_pre_base_matra_moves_to_front_of_cluster() {
787 // "कि" = KA + sign-i → sign-i + KA after Devanagari reorder.
788 let chars = vec!['\u{0915}', '\u{093F}'];
789 let (out, marks, spans) = apply_indic_reorder(&chars);
790 assert_eq!(out, vec!['\u{093F}', '\u{0915}']);
791 assert!(marks.is_empty(), "no reph in this cluster");
792 assert_eq!(spans.len(), 1);
793 assert_eq!(spans[0].start, 0);
794 assert_eq!(spans[0].end, 2);
795 assert_eq!(spans[0].script, Script::Devanagari);
796 }
797
798 #[test]
799 fn devanagari_two_clusters_each_reorder_independently() {
800 // "किकि" → two clusters; each reorders its matra to the front.
801 let chars = vec!['\u{0915}', '\u{093F}', '\u{0915}', '\u{093F}'];
802 let (out, _, spans) = apply_indic_reorder(&chars);
803 assert_eq!(out, vec!['\u{093F}', '\u{0915}', '\u{093F}', '\u{0915}']);
804 assert_eq!(spans.len(), 2);
805 assert_eq!((spans[0].start, spans[0].end), (0, 2));
806 assert_eq!((spans[1].start, spans[1].end), (2, 4));
807 }
808
809 #[test]
810 fn devanagari_conjunct_reorder_keeps_halant_chain_intact() {
811 // "क्षि" = KA + halant + SSA + sign-i. Conjunct stays in
812 // logical order; matra moves to front.
813 let chars = vec!['\u{0915}', '\u{094D}', '\u{0937}', '\u{093F}'];
814 let (out, _, _) = apply_indic_reorder(&chars);
815 assert_eq!(out, vec!['\u{093F}', '\u{0915}', '\u{094D}', '\u{0937}']);
816 }
817
818 #[test]
819 fn ascii_passes_through_indic_reorder_unchanged() {
820 // Sanity: non-Indic input must not be touched.
821 let chars: Vec<char> = "Hello".chars().collect();
822 let (out, marks, spans) = apply_indic_reorder(&chars);
823 assert_eq!(out, chars);
824 assert!(marks.is_empty());
825 // Non-Indic chars produce no cluster spans.
826 assert!(spans.is_empty());
827 }
828
829 #[test]
830 fn mixed_latin_and_devanagari_reorders_only_devanagari_clusters() {
831 // "Aकि" → Latin A passes through; Devanagari cluster reorders.
832 let chars = vec!['A', '\u{0915}', '\u{093F}'];
833 let (out, _, spans) = apply_indic_reorder(&chars);
834 assert_eq!(out, vec!['A', '\u{093F}', '\u{0915}']);
835 // Only the Devanagari cluster gets a span.
836 assert_eq!(spans.len(), 1);
837 assert_eq!((spans[0].start, spans[0].end), (1, 3));
838 }
839
840 #[test]
841 fn devanagari_reph_emits_reph_mark_at_correct_index() {
842 // RA + halant + KA → reph cluster. The mark records ra_idx=0
843 // and halant_idx=1 (no pre-base matra reorder shifted them).
844 let chars = vec!['\u{0930}', '\u{094D}', '\u{0915}'];
845 let (out, marks, _) = apply_indic_reorder(&chars);
846 assert_eq!(out, vec!['\u{0930}', '\u{094D}', '\u{0915}']);
847 assert_eq!(marks.len(), 1);
848 assert_eq!(marks[0].ra_idx, 0);
849 assert_eq!(marks[0].halant_idx, 1);
850 assert_eq!(marks[0].script, Script::Devanagari);
851 }
852
853 #[test]
854 fn devanagari_reph_with_pre_base_matra_shifts_reph_mark_by_one() {
855 // RA + halant + KA + sign-i — matra moves to position 0; RA
856 // is now at position 1, halant at 2.
857 let chars = vec!['\u{0930}', '\u{094D}', '\u{0915}', '\u{093F}'];
858 let (out, marks, _) = apply_indic_reorder(&chars);
859 assert_eq!(out, vec!['\u{093F}', '\u{0930}', '\u{094D}', '\u{0915}']);
860 assert_eq!(marks.len(), 1);
861 assert_eq!(marks[0].ra_idx, 1);
862 assert_eq!(marks[0].halant_idx, 2);
863 }
864
865 #[test]
866 fn bengali_pre_base_matra_e_moves_to_front_of_cluster() {
867 // BENGALI KA + sign-e → sign-e + KA.
868 let chars = vec!['\u{0995}', '\u{09C7}'];
869 let (out, marks, spans) = apply_indic_reorder(&chars);
870 assert_eq!(out, vec!['\u{09C7}', '\u{0995}']);
871 assert!(marks.is_empty());
872 assert_eq!(spans.len(), 1);
873 assert_eq!(spans[0].script, Script::Bengali);
874 }
875
876 #[test]
877 fn bengali_reph_emits_reph_mark_with_bengali_script_tag() {
878 // BENGALI RA + halant + KA → reph cluster.
879 let chars = vec!['\u{09B0}', '\u{09CD}', '\u{0995}'];
880 let (out, marks, _) = apply_indic_reorder(&chars);
881 assert_eq!(out, vec!['\u{09B0}', '\u{09CD}', '\u{0995}']);
882 assert_eq!(marks.len(), 1);
883 assert_eq!(marks[0].script, Script::Bengali);
884 }
885
886 #[test]
887 fn tamil_pre_base_matra_e_moves_to_front_of_cluster() {
888 // TAMIL KA + sign-e → sign-e + KA.
889 let chars = vec!['\u{0B95}', '\u{0BC6}'];
890 let (out, marks, _) = apply_indic_reorder(&chars);
891 assert_eq!(out, vec!['\u{0BC6}', '\u{0B95}']);
892 assert!(marks.is_empty());
893 }
894
895 #[test]
896 fn tamil_RA_plus_halant_does_NOT_emit_reph_mark() {
897 // TAMIL RA + pulli + KA — Tamil never forms a reph.
898 let chars = vec!['\u{0BB0}', '\u{0BCD}', '\u{0B95}'];
899 let (_out, marks, _) = apply_indic_reorder(&chars);
900 assert!(marks.is_empty(), "Tamil never sets the reph flag");
901 }
902
903 #[test]
904 fn mixed_devanagari_and_bengali_runs_segment_independently() {
905 // Devanagari KA + sign-i + Bengali KA + sign-i.
906 let chars = vec!['\u{0915}', '\u{093F}', '\u{0995}', '\u{09BF}'];
907 let (out, _, spans) = apply_indic_reorder(&chars);
908 // Each script's pre-base matra moves to the front of its OWN
909 // cluster (cluster boundary at the script switch).
910 assert_eq!(out, vec!['\u{093F}', '\u{0915}', '\u{09BF}', '\u{0995}']);
911 assert_eq!(spans.len(), 2);
912 assert_eq!(spans[0].script, Script::Devanagari);
913 assert_eq!(spans[1].script, Script::Bengali);
914 }
915
916 // ---------- Round 11 — new scripts ----------
917
918 #[test]
919 fn gurmukhi_cluster_reorder_emits_span_with_gurmukhi_script() {
920 // KA + sign-i — pre-base matra reorders.
921 let chars = vec!['\u{0A15}', '\u{0A3F}'];
922 let (out, _, spans) = apply_indic_reorder(&chars);
923 assert_eq!(out, vec!['\u{0A3F}', '\u{0A15}']);
924 assert_eq!(spans.len(), 1);
925 assert_eq!(spans[0].script, Script::Gurmukhi);
926 }
927
928 #[test]
929 fn gujarati_cluster_reorder_emits_span_with_gujarati_script() {
930 // KA + sign-i.
931 let chars = vec!['\u{0A95}', '\u{0ABF}'];
932 let (out, _, spans) = apply_indic_reorder(&chars);
933 assert_eq!(out, vec!['\u{0ABF}', '\u{0A95}']);
934 assert_eq!(spans.len(), 1);
935 assert_eq!(spans[0].script, Script::Gujarati);
936 }
937
938 #[test]
939 fn telugu_pre_base_matra_e_reorders_with_telugu_span() {
940 // KA + sign-e (pre-base) — reorder.
941 let chars = vec!['\u{0C15}', '\u{0C46}'];
942 let (out, _, spans) = apply_indic_reorder(&chars);
943 assert_eq!(out, vec!['\u{0C46}', '\u{0C15}']);
944 assert_eq!(spans.len(), 1);
945 assert_eq!(spans[0].script, Script::Telugu);
946 }
947
948 #[test]
949 fn kannada_reph_emits_reph_mark_with_kannada_script_tag() {
950 let chars = vec!['\u{0CB0}', '\u{0CCD}', '\u{0C95}'];
951 let (_out, marks, spans) = apply_indic_reorder(&chars);
952 assert_eq!(marks.len(), 1);
953 assert_eq!(marks[0].script, Script::Kannada);
954 assert_eq!(spans.len(), 1);
955 assert_eq!(spans[0].script, Script::Kannada);
956 }
957
958 #[test]
959 fn malayalam_RA_plus_halant_does_NOT_emit_reph_mark() {
960 // Modern Malayalam — chillu replaces reph.
961 let chars = vec!['\u{0D30}', '\u{0D4D}', '\u{0D15}'];
962 let (_out, marks, spans) = apply_indic_reorder(&chars);
963 assert!(marks.is_empty());
964 assert_eq!(spans.len(), 1);
965 assert_eq!(spans[0].script, Script::Malayalam);
966 }
967
968 #[test]
969 fn oriya_pre_base_matra_e_reorders_with_oriya_span() {
970 let chars = vec!['\u{0B15}', '\u{0B47}'];
971 let (out, _, spans) = apply_indic_reorder(&chars);
972 assert_eq!(out, vec!['\u{0B47}', '\u{0B15}']);
973 assert_eq!(spans.len(), 1);
974 assert_eq!(spans[0].script, Script::Oriya);
975 }
976
977 #[test]
978 fn malayalam_chillu_starts_new_cluster_from_following_consonant() {
979 // Chillu U+0D7A + KA U+0D15 — chillu is a Consonant, the next
980 // consonant starts a new cluster.
981 let chars = vec!['\u{0D7A}', '\u{0D15}'];
982 let (_out, _marks, spans) = apply_indic_reorder(&chars);
983 assert_eq!(spans.len(), 2);
984 }
985
986 // ---------- Round 12 (Brahmic non-Indic) ----------
987
988 #[test]
989 fn sinhala_pre_base_matra_reorders_with_sinhala_span() {
990 // Sinhala KA U+0D9A + sign-e U+0DD9 → sign-e + KA.
991 let chars = vec!['\u{0D9A}', '\u{0DD9}'];
992 let (out, marks, spans) = apply_indic_reorder(&chars);
993 assert_eq!(out, vec!['\u{0DD9}', '\u{0D9A}']);
994 assert!(marks.is_empty(), "Sinhala has no reph");
995 assert_eq!(spans.len(), 1);
996 assert_eq!(spans[0].script, Script::Sinhala);
997 }
998
999 #[test]
1000 fn sinhala_RA_plus_al_lakuna_does_NOT_emit_reph_mark() {
1001 // Sinhala has no superscript reph rendering.
1002 let chars = vec!['\u{0DBB}', '\u{0DCA}', '\u{0D9A}'];
1003 let (_out, marks, spans) = apply_indic_reorder(&chars);
1004 assert!(marks.is_empty());
1005 assert_eq!(spans.len(), 1);
1006 assert_eq!(spans[0].script, Script::Sinhala);
1007 }
1008
1009 #[test]
1010 fn khmer_pre_base_matra_reorders_with_khmer_span() {
1011 // Khmer KA U+1780 + sign-e U+17C1 → sign-e + KA.
1012 let chars = vec!['\u{1780}', '\u{17C1}'];
1013 let (out, _marks, spans) = apply_indic_reorder(&chars);
1014 assert_eq!(out, vec!['\u{17C1}', '\u{1780}']);
1015 assert_eq!(spans.len(), 1);
1016 assert_eq!(spans[0].script, Script::Khmer);
1017 }
1018
1019 #[test]
1020 fn khmer_coeng_keeps_subjoined_chain_in_one_cluster_span() {
1021 // KA + COENG + KHA + COENG + GA — three-deep subjoined chain.
1022 let chars = vec!['\u{1780}', '\u{17D2}', '\u{1781}', '\u{17D2}', '\u{1782}'];
1023 let (out, marks, spans) = apply_indic_reorder(&chars);
1024 assert_eq!(out, chars); // no reorder (no pre-base matra)
1025 assert!(marks.is_empty());
1026 assert_eq!(spans.len(), 1);
1027 assert_eq!((spans[0].start, spans[0].end), (0, 5));
1028 assert_eq!(spans[0].script, Script::Khmer);
1029 }
1030
1031 #[test]
1032 fn thai_no_reorder_preserves_storage_order() {
1033 // Thai SARA E (pre-base in storage) + KO KAI — already in
1034 // visual order; cluster machine starts a new cluster at each.
1035 let chars = vec!['\u{0E40}', '\u{0E01}'];
1036 let (out, marks, spans) = apply_indic_reorder(&chars);
1037 assert_eq!(out, chars);
1038 assert!(marks.is_empty());
1039 assert_eq!(spans.len(), 2);
1040 assert_eq!(spans[0].script, Script::Thai);
1041 assert_eq!(spans[1].script, Script::Thai);
1042 }
1043
1044 #[test]
1045 fn thai_consonant_with_above_vowel_and_tone_emits_one_span() {
1046 // KO KAI + SARA I (above) + MAI THO (tone) — single cluster.
1047 let chars = vec!['\u{0E01}', '\u{0E34}', '\u{0E49}'];
1048 let (out, marks, spans) = apply_indic_reorder(&chars);
1049 assert_eq!(out, chars);
1050 assert!(marks.is_empty());
1051 assert_eq!(spans.len(), 1);
1052 assert_eq!((spans[0].start, spans[0].end), (0, 3));
1053 assert_eq!(spans[0].script, Script::Thai);
1054 }
1055
1056 #[test]
1057 fn mixed_devanagari_and_thai_segments_at_script_boundary() {
1058 // Devanagari KA + Thai KO KAI — different scripts, two
1059 // independent clusters.
1060 let chars = vec!['\u{0915}', '\u{0E01}'];
1061 let (_out, marks, spans) = apply_indic_reorder(&chars);
1062 assert!(marks.is_empty());
1063 assert_eq!(spans.len(), 2);
1064 assert_eq!(spans[0].script, Script::Devanagari);
1065 assert_eq!(spans[1].script, Script::Thai);
1066 }
1067
1068 #[test]
1069 fn adjust_cluster_spans_shifts_subsequent_spans_after_drop() {
1070 use super::adjust_cluster_spans;
1071 let chars = vec!['a'; 10];
1072 let spans = vec![
1073 ClusterSpan {
1074 start: 0,
1075 end: 3,
1076 script: Script::Devanagari,
1077 },
1078 ClusterSpan {
1079 start: 3,
1080 end: 6,
1081 script: Script::Devanagari,
1082 },
1083 ];
1084 // Pretend reph dropped the halant at index 1 (in cluster 0).
1085 let dropped = vec![1usize];
1086 let adjusted = adjust_cluster_spans(&spans, &dropped, &chars);
1087 // Cluster 0 shrinks by 1 at the end.
1088 assert_eq!((adjusted[0].start, adjusted[0].end), (0, 2));
1089 // Cluster 1 shifts both start and end down by 1.
1090 assert_eq!((adjusted[1].start, adjusted[1].end), (2, 5));
1091 }
1092
1093 #[test]
1094 fn arabic_run_translates_to_presentation_forms() {
1095 // BEH BEH BEH → Init Medi Fina presentation forms.
1096 // 0x0628 BEH → init 0xFE91, medi 0xFE92, fina 0xFE90.
1097 let chars = vec!['\u{0628}', '\u{0628}', '\u{0628}'];
1098 let out = apply_arabic_joining(&chars);
1099 assert_eq!(out, vec!['\u{FE91}', '\u{FE92}', '\u{FE90}']);
1100 }
1101
1102 #[test]
1103 fn arabic_run_with_ascii_separator() {
1104 // BEH BEH SPACE BEH BEH → first BEHs become Init/Fina, space
1105 // unchanged, second BEHs become Init/Fina.
1106 let chars = vec!['\u{0628}', '\u{0628}', ' ', '\u{0628}', '\u{0628}'];
1107 let out = apply_arabic_joining(&chars);
1108 assert_eq!(
1109 out,
1110 vec!['\u{FE91}', '\u{FE90}', ' ', '\u{FE91}', '\u{FE90}']
1111 );
1112 }
1113
1114 // Mock-style tests are covered in the integration test file, where
1115 // we build a 2-face chain and verify face_idx routing. Unit-level
1116 // testing here is awkward because Face requires real TTF bytes.
1117}