kas_text/
shaper.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License in the LICENSE-APACHE file or at:
4//     https://www.apache.org/licenses/LICENSE-2.0
5
6//! Text shaping
7//!
8//! To quote the HarfBuzz manual:
9//!
10//! > Text shaping is the process of translating a string of character codes
11//! > (such as Unicode codepoints) into a properly arranged sequence of glyphs
12//! > that can be rendered onto a screen or into final output form for
13//! > inclusion in a document.
14//!
15//! This module provides the [`shape`] function, which produces a sequence of
16//! [`Glyph`]s based on the given text.
17//!
18//! This module *does not* perform line-breaking, wrapping or text reversal.
19
20use crate::conv::{to_u32, to_usize, DPU};
21use crate::display::RunSpecial;
22use crate::fonts::{self, FaceId};
23use crate::{Range, Vec2};
24use fontique::Script;
25use tinyvec::TinyVec;
26use unicode_bidi::Level;
27
28/// A type-safe wrapper for glyph ID.
29#[repr(transparent)]
30#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Default, Debug)]
31pub struct GlyphId(pub u16);
32
33/// A positioned glyph
34#[derive(Clone, Copy, Debug)]
35pub struct Glyph {
36    /// Index of char in source text
37    pub index: u32,
38    /// Glyph identifier in font
39    pub id: GlyphId,
40    /// Position of glyph
41    pub position: Vec2,
42}
43
44#[derive(Clone, Copy, Debug, Default)]
45pub(crate) struct GlyphBreak {
46    /// Index of char in source text
47    pub index: u32,
48    /// Position in sequence of glyphs
49    pub gi: u32,
50    /// End position of previous "word" excluding space
51    pub no_space_end: f32,
52}
53impl GlyphBreak {
54    /// Constructs with first field only
55    ///
56    /// Other fields are set later by shaper.
57    pub(crate) fn new(index: u32) -> Self {
58        GlyphBreak {
59            index,
60            gi: u32::MAX,
61            no_space_end: f32::NAN,
62        }
63    }
64}
65
66#[derive(Clone, Copy, Debug, Default, PartialEq)]
67pub(crate) struct PartMetrics {
68    /// The distance from the origin to the start of the left-most part
69    pub offset: f32,
70    /// Length (excluding whitespace)
71    pub len_no_space: f32,
72    /// Length (including trailing whitespace)
73    pub len: f32,
74}
75
76/// A glyph run
77///
78/// A glyph run is a sequence of glyphs, starting from the origin: 0.0.
79/// Whether the run is left-to-right text or right-to-left, glyphs are
80/// positioned between 0.0 and `run.caret` (usually with some internal
81/// margin due to side bearings — though this could even be negative).
82/// The first glyph in the run should not be invisible (space) except where the
83/// run occurs at the start of a line with explicit initial spacing, however
84/// the run may end with white-space. `no_space_end` gives the "caret" position
85/// of the *logical* end of the run, excluding white-space (for right-to-left
86/// text, this is the end nearer the origin than `caret`).
87#[derive(Clone, Debug)]
88pub(crate) struct GlyphRun {
89    /// Range in source text
90    pub range: Range,
91    /// Font size (pixels/em)
92    pub dpem: f32,
93    pub dpu: DPU,
94
95    /// Font face identifier
96    pub face_id: FaceId,
97    /// Tab or no-break property
98    pub special: RunSpecial,
99    /// BIDI level
100    pub level: Level,
101    /// Script
102    pub script: Script,
103
104    /// Sequence of all glyphs, in left-to-right order
105    pub glyphs: Vec<Glyph>,
106    /// All soft-breaks within this run, in left-to-right order
107    ///
108    /// Note: it would be equivalent to use a separate `Run` for each sub-range
109    /// in the text instead of tracking breaks via this field.
110    pub breaks: TinyVec<[GlyphBreak; 4]>,
111
112    /// End position, excluding whitespace
113    ///
114    /// Use [`GlyphRun::start_no_space`] or [`GlyphRun::end_no_space`].
115    pub no_space_end: f32,
116    /// Position of next glyph, if this run is followed by another
117    pub caret: f32,
118}
119
120impl GlyphRun {
121    /// Number of parts
122    ///
123    /// Parts are in logical order
124    pub fn num_parts(&self) -> usize {
125        self.breaks.len() + 1
126    }
127
128    /// Calculate lengths for a part range
129    ///
130    /// Parts are identified in logical order with end index up to
131    /// `self.num_parts()`.
132    pub fn part_lengths(&self, range: std::ops::Range<usize>) -> PartMetrics {
133        // TODO: maybe we should adjust self.breaks to clean this up?
134        assert!(range.start <= range.end);
135
136        let mut part = PartMetrics::default();
137        if self.level.is_ltr() {
138            if range.end > 0 {
139                part.len_no_space = self.no_space_end;
140                part.len = self.caret;
141                if range.end <= self.breaks.len() {
142                    let b = self.breaks[range.end - 1];
143                    part.len_no_space = b.no_space_end;
144                    if to_usize(b.gi) < self.glyphs.len() {
145                        part.len = self.glyphs[to_usize(b.gi)].position.0
146                    }
147                }
148            }
149
150            if range.start > 0 {
151                let glyph = to_usize(self.breaks[range.start - 1].gi);
152                part.offset = self.glyphs[glyph].position.0;
153                part.len_no_space -= part.offset;
154                part.len -= part.offset;
155            }
156        } else {
157            if range.start <= self.breaks.len() {
158                part.len = self.caret;
159                if range.start > 0 {
160                    let b = self.breaks.len() - range.start;
161                    let gi = to_usize(self.breaks[b].gi);
162                    if gi < self.glyphs.len() {
163                        part.len = self.glyphs[gi].position.0;
164                    }
165                }
166                part.len_no_space = part.len;
167            }
168            if range.end <= self.breaks.len() {
169                part.offset = self.caret;
170                if range.end == 0 {
171                    part.len_no_space = 0.0;
172                } else {
173                    let b = self.breaks.len() - range.end;
174                    let b = self.breaks[b];
175                    part.len_no_space -= b.no_space_end;
176                    if to_usize(b.gi) < self.glyphs.len() {
177                        part.offset = self.glyphs[to_usize(b.gi)].position.0;
178                    }
179                }
180                part.len -= part.offset;
181            }
182        }
183
184        part
185    }
186
187    /// Get glyph index from part index
188    pub fn to_glyph_range(&self, range: std::ops::Range<usize>) -> Range {
189        let mut start = range.start;
190        let mut end = range.end;
191
192        let rtl = self.level.is_rtl();
193        if rtl {
194            let num_parts = self.num_parts();
195            start = num_parts - start;
196            end = num_parts - end;
197        }
198
199        let map = |part: usize| {
200            if part == 0 {
201                0
202            } else if part <= self.breaks.len() {
203                to_usize(self.breaks[part - 1].gi)
204            } else {
205                debug_assert_eq!(part, self.breaks.len() + 1);
206                self.glyphs.len()
207            }
208        };
209
210        let mut start = map(start);
211        let mut end = map(end);
212
213        if rtl {
214            std::mem::swap(&mut start, &mut end);
215        }
216
217        Range::from(start..end)
218    }
219}
220
221#[derive(Clone, Copy, Debug)]
222pub(crate) struct Input<'a> {
223    /// Contiguous text
224    pub text: &'a str,
225    pub dpem: f32,
226    pub level: Level,
227    pub script: Script,
228}
229
230/// Shape a `run` of text
231///
232/// A "run" is expected to be the maximal sequence of code points of the same
233/// embedding level (as defined by Unicode TR9 aka BIDI algorithm) *and*
234/// excluding all hard line breaks (e.g. `\n`).
235pub(crate) fn shape(
236    input: Input,
237    range: Range, // range in text
238    face_id: FaceId,
239    // All soft-break locations within this run, excluding the end
240    mut breaks: TinyVec<[GlyphBreak; 4]>,
241    special: RunSpecial,
242) -> GlyphRun {
243    /*
244    print!("shape[{:?}]:\t", special);
245    let mut start = range.start();
246    for b in &breaks {
247        print!("\"{}\" ", &text[start..(b.index as usize)]);
248        start = b.index as usize;
249    }
250    println!("\"{}\"", &text[start..range.end()]);
251    */
252
253    if input.level.is_rtl() {
254        breaks.reverse();
255    }
256
257    let mut glyphs = vec![];
258    let mut no_space_end = 0.0;
259    let mut caret = 0.0;
260
261    let face = fonts::library().get_face(face_id);
262    let dpu = face.dpu(input.dpem);
263    let sf = face.scale_by_dpu(dpu);
264
265    if input.dpem >= 0.0 {
266        #[cfg(feature = "rustybuzz")]
267        let r = shape_rustybuzz(input, range, face_id, &mut breaks);
268
269        #[cfg(not(feature = "rustybuzz"))]
270        let r = shape_simple(sf, input, range, &mut breaks);
271
272        glyphs = r.0;
273        no_space_end = r.1;
274        caret = r.2;
275    }
276
277    if input.level.is_rtl() {
278        // With RTL text, no_space_end means start_no_space; recalculate
279        let mut break_i = breaks.len().wrapping_sub(1);
280        let mut start_no_space = caret;
281        let mut last_id = None;
282        let side_bearing = |id: Option<GlyphId>| id.map(|id| sf.h_side_bearing(id)).unwrap_or(0.0);
283        for (gi, glyph) in glyphs.iter().enumerate().rev() {
284            if break_i < breaks.len() && to_usize(breaks[break_i].gi) == gi {
285                assert!(gi < glyphs.len());
286                breaks[break_i].gi = to_u32(gi) + 1;
287                breaks[break_i].no_space_end = start_no_space - side_bearing(last_id);
288                break_i = break_i.wrapping_sub(1);
289            }
290            if !input.text[to_usize(glyph.index)..]
291                .chars()
292                .next()
293                .map(|c| c.is_whitespace())
294                .unwrap_or(true)
295            {
296                last_id = Some(glyph.id);
297                start_no_space = glyph.position.0;
298            }
299        }
300        no_space_end = start_no_space - side_bearing(last_id);
301    }
302
303    GlyphRun {
304        range,
305        dpem: input.dpem,
306        dpu,
307        face_id,
308        special,
309        level: input.level,
310        script: input.script,
311
312        glyphs,
313        breaks,
314        no_space_end,
315        caret,
316    }
317}
318
319// Use Rustybuzz lib
320#[cfg(feature = "rustybuzz")]
321fn shape_rustybuzz(
322    input: Input<'_>,
323    range: Range,
324    face_id: FaceId,
325    breaks: &mut [GlyphBreak],
326) -> (Vec<Glyph>, f32, f32) {
327    let Input {
328        text,
329        dpem,
330        level,
331        script,
332    } = input;
333
334    let fonts = fonts::library();
335    let store = fonts.get_face_store(face_id);
336    let dpu = store.face_ref().dpu(dpem);
337    let face = store.rustybuzz();
338
339    // ppem affects hinting but does not scale layout, so this has little effect:
340    // face.set_pixels_per_em(Some((dpem as u16, dpem as u16)));
341
342    let slice = &text[range];
343    let idx_offset = range.start;
344    let rtl = level.is_rtl();
345
346    // TODO: cache the buffer for reuse later?
347    let mut buffer = rustybuzz::UnicodeBuffer::new();
348    buffer.set_direction(match rtl {
349        false => rustybuzz::Direction::LeftToRight,
350        true => rustybuzz::Direction::RightToLeft,
351    });
352    buffer.push_str(slice);
353    let tag = ttf_parser::Tag(u32::from_be_bytes(script.0));
354    if let Some(script) = rustybuzz::Script::from_iso15924_tag(tag) {
355        buffer.set_script(script);
356    }
357    let features = [];
358
359    let output = rustybuzz::shape(face, &features, buffer);
360
361    let mut caret = 0.0;
362    let mut no_space_end = caret;
363    let mut break_i = 0;
364
365    let mut glyphs = Vec::with_capacity(output.len());
366
367    for (info, pos) in output
368        .glyph_infos()
369        .iter()
370        .zip(output.glyph_positions().iter())
371    {
372        let index = idx_offset + info.cluster;
373        assert!(info.glyph_id <= u16::MAX as u32, "failed to map glyph id");
374        let id = GlyphId(info.glyph_id as u16);
375
376        if breaks
377            .get(break_i)
378            .map(|b| b.index == index)
379            .unwrap_or(false)
380        {
381            breaks[break_i].gi = to_u32(glyphs.len());
382            breaks[break_i].no_space_end = no_space_end;
383            break_i += 1;
384        }
385
386        let position = Vec2(
387            caret + dpu.i32_to_px(pos.x_offset),
388            dpu.i32_to_px(pos.y_offset),
389        );
390        glyphs.push(Glyph {
391            index,
392            id,
393            position,
394        });
395
396        // IIRC this is only applicable to vertical text, which we don't
397        // currently support:
398        debug_assert_eq!(pos.y_advance, 0);
399        caret += dpu.i32_to_px(pos.x_advance);
400        if text[to_usize(index)..]
401            .chars()
402            .next()
403            .map(|c| !c.is_whitespace())
404            .unwrap()
405        {
406            no_space_end = caret;
407        }
408    }
409
410    (glyphs, no_space_end, caret)
411}
412
413// Simple implementation (kerning but no shaping)
414#[cfg(not(feature = "rustybuzz"))]
415fn shape_simple(
416    sf: crate::fonts::ScaledFaceRef,
417    input: Input<'_>,
418    range: Range,
419    breaks: &mut [GlyphBreak],
420) -> (Vec<Glyph>, f32, f32) {
421    let Input { text, level, .. } = input;
422
423    use unicode_bidi_mirroring::get_mirrored;
424
425    let slice = &text[range];
426    let idx_offset = range.start;
427    let rtl = level.is_rtl();
428
429    let mut caret = 0.0;
430    let mut no_space_end = caret;
431    let mut prev_glyph_id: Option<GlyphId> = None;
432    let mut break_i = 0;
433
434    // Allocate with an over-estimate and shrink later:
435    let mut glyphs = Vec::with_capacity(slice.len());
436    let mut iter = slice.char_indices();
437    let mut next_char_index = || match rtl {
438        false => iter.next(),
439        true => iter.next_back(),
440    };
441    while let Some((index, mut c)) = next_char_index() {
442        let index = idx_offset + to_u32(index);
443        if rtl {
444            if let Some(m) = get_mirrored(c) {
445                c = m;
446            }
447        }
448        let id = sf.face().glyph_index(c);
449
450        if breaks
451            .get(break_i)
452            .map(|b| b.index == index)
453            .unwrap_or(false)
454        {
455            breaks[break_i].gi = to_u32(glyphs.len());
456            breaks[break_i].no_space_end = no_space_end;
457            break_i += 1;
458            no_space_end = caret;
459        }
460
461        if let Some(prev) = prev_glyph_id {
462            if let Some(kern) = sf.face().0.tables().kern {
463                if let Some(adv) = kern
464                    .subtables
465                    .into_iter()
466                    .filter(|st| st.horizontal && !st.variable)
467                    .find_map(|st| st.glyphs_kerning(prev.into(), id.into()))
468                {
469                    caret += sf.dpu().i16_to_px(adv);
470                }
471            }
472        }
473        prev_glyph_id = Some(id);
474
475        let position = Vec2(caret, 0.0);
476        let glyph = Glyph {
477            index,
478            id,
479            position,
480        };
481        glyphs.push(glyph);
482
483        caret += sf.h_advance(id);
484        if !c.is_whitespace() {
485            no_space_end = caret;
486        }
487    }
488
489    glyphs.shrink_to_fit();
490
491    (glyphs, no_space_end, caret)
492}