Skip to main content

forme/text/
shaping.rs

1//! # OpenType Shaping
2//!
3//! Wraps rustybuzz to perform OpenType shaping (GSUB/GPOS) on text.
4//! This produces real glyph IDs, kerning offsets, and ligature substitutions
5//! instead of the naive char-as-u16 glyph IDs used before.
6//!
7//! Standard PDF fonts (Helvetica, Times, Courier) bypass shaping entirely —
8//! they use WinAnsi encoding and don't have GSUB/GPOS tables.
9
10/// A single glyph produced by OpenType shaping.
11#[derive(Debug, Clone)]
12pub struct ShapedGlyph {
13    /// Real glyph ID from GSUB (not a Unicode codepoint).
14    pub glyph_id: u16,
15    /// Index of the first character in the input text that maps to this glyph.
16    /// Multiple chars may map to one glyph (ligatures), or one char may
17    /// produce multiple glyphs (decomposition).
18    pub cluster: u32,
19    /// Horizontal advance in font units.
20    pub x_advance: i32,
21    /// Vertical advance in font units (usually 0 for horizontal text).
22    pub y_advance: i32,
23    /// Horizontal offset from GPOS (kerning, mark positioning).
24    pub x_offset: i32,
25    /// Vertical offset from GPOS.
26    pub y_offset: i32,
27}
28
29/// Shape text using the given font data.
30///
31/// Returns `None` if the font data can't be parsed. For standard fonts
32/// (no font data), callers should skip shaping entirely.
33pub fn shape_text(text: &str, font_data: &[u8]) -> Option<Vec<ShapedGlyph>> {
34    shape_text_with_direction(text, font_data, false)
35}
36
37/// Shape text with explicit direction control.
38///
39/// When `is_rtl` is true, the shaper applies RTL contextual forms (e.g.,
40/// Arabic initial/medial/final forms) and produces glyphs in visual order.
41pub fn shape_text_with_direction(
42    text: &str,
43    font_data: &[u8],
44    is_rtl: bool,
45) -> Option<Vec<ShapedGlyph>> {
46    let face = rustybuzz::Face::from_slice(font_data, 0)?;
47    let mut buffer = rustybuzz::UnicodeBuffer::new();
48    buffer.push_str(text);
49    if is_rtl {
50        buffer.set_direction(rustybuzz::Direction::RightToLeft);
51    }
52
53    let output = rustybuzz::shape(&face, &[], buffer);
54
55    let infos = output.glyph_infos();
56    let positions = output.glyph_positions();
57
58    // rustybuzz returns byte offsets in cluster values, but downstream code
59    // indexes into char arrays. Convert byte offsets to char indices.
60    let byte_to_char: std::collections::HashMap<usize, usize> = text
61        .char_indices()
62        .enumerate()
63        .map(|(ci, (bi, _))| (bi, ci))
64        .collect();
65
66    let glyphs = infos
67        .iter()
68        .zip(positions.iter())
69        .map(|(info, pos)| {
70            let char_idx = byte_to_char
71                .get(&(info.cluster as usize))
72                .copied()
73                .unwrap_or(0);
74            ShapedGlyph {
75                glyph_id: info.glyph_id as u16,
76                cluster: char_idx as u32,
77                x_advance: pos.x_advance,
78                y_advance: pos.y_advance,
79                x_offset: pos.x_offset,
80                y_offset: pos.y_offset,
81            }
82        })
83        .collect();
84
85    Some(glyphs)
86}
87
88/// Shape a segment of text for a specific font, returning shaped glyphs
89/// and a mapping from glyph index to character range.
90///
91/// `char_offset` is added to cluster values so they reference positions
92/// in a larger text buffer (useful for multi-run shaping).
93pub fn shape_text_with_offset(
94    text: &str,
95    font_data: &[u8],
96    char_offset: u32,
97) -> Option<Vec<ShapedGlyph>> {
98    let mut glyphs = shape_text(text, font_data)?;
99    for g in &mut glyphs {
100        g.cluster += char_offset;
101    }
102    Some(glyphs)
103}
104
105/// Compute the total advance width of shaped glyphs in points.
106pub fn shaped_width(glyphs: &[ShapedGlyph], units_per_em: u16, font_size: f64) -> f64 {
107    let scale = font_size / units_per_em as f64;
108    glyphs.iter().map(|g| g.x_advance as f64 * scale).sum()
109}
110
111/// Compute per-cluster widths from shaped glyphs.
112///
113/// Returns a Vec where index i is the width contributed by the glyph(s)
114/// whose cluster value is i. This accounts for ligatures (one glyph for
115/// multiple chars) and decomposition (multiple glyphs for one char).
116///
117/// `num_chars` is the total number of characters in the input text.
118pub fn cluster_widths(
119    glyphs: &[ShapedGlyph],
120    num_chars: usize,
121    units_per_em: u16,
122    font_size: f64,
123    letter_spacing: f64,
124) -> Vec<f64> {
125    let scale = font_size / units_per_em as f64;
126    let mut widths = vec![0.0_f64; num_chars];
127
128    for glyph in glyphs {
129        let cluster = glyph.cluster as usize;
130        if cluster < num_chars {
131            widths[cluster] += glyph.x_advance as f64 * scale + letter_spacing;
132        }
133    }
134
135    // For ligatures, the first char of the cluster gets the full width,
136    // and subsequent chars in the same cluster get zero width.
137    // We need to identify cluster ranges and zero out non-first chars.
138    if !glyphs.is_empty() {
139        let mut cluster_starts: Vec<bool> = vec![false; num_chars];
140        for glyph in glyphs {
141            let c = glyph.cluster as usize;
142            if c < num_chars {
143                cluster_starts[c] = true;
144            }
145        }
146
147        // Walk through chars: if a char's cluster_starts is false and the
148        // previous char's cluster value would encompass it (ligature), its
149        // width should be 0 (the first char of the cluster already has it).
150        // We detect this by checking: chars not in cluster_starts that follow
151        // a cluster_start should have width 0.
152        let mut in_ligature = false;
153        for i in 0..num_chars {
154            if cluster_starts[i] {
155                in_ligature = false;
156            } else if i > 0 {
157                // This char wasn't the start of any glyph cluster.
158                // It's part of a ligature with the previous cluster.
159                in_ligature = true;
160            }
161            if in_ligature {
162                widths[i] = 0.0;
163            }
164        }
165    }
166
167    widths
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    // Helper: create a minimal test font (we can't easily create one in tests,
175    // so we test the public functions with None returns for invalid data)
176    #[test]
177    fn test_shape_text_invalid_font() {
178        let result = shape_text("Hello", &[0, 1, 2, 3]);
179        assert!(result.is_none());
180    }
181
182    #[test]
183    fn test_shape_text_empty() {
184        let result = shape_text("", &[0, 1, 2, 3]);
185        assert!(result.is_none());
186    }
187
188    #[test]
189    fn test_shaped_width_empty() {
190        let width = shaped_width(&[], 1000, 12.0);
191        assert_eq!(width, 0.0);
192    }
193
194    #[test]
195    fn test_cluster_widths_empty() {
196        let widths = cluster_widths(&[], 0, 1000, 12.0, 0.0);
197        assert!(widths.is_empty());
198    }
199
200    #[test]
201    fn test_cluster_widths_basic() {
202        // Simulate 3 glyphs for 3 chars, each in its own cluster
203        let glyphs = vec![
204            ShapedGlyph {
205                glyph_id: 1,
206                cluster: 0,
207                x_advance: 500,
208                y_advance: 0,
209                x_offset: 0,
210                y_offset: 0,
211            },
212            ShapedGlyph {
213                glyph_id: 2,
214                cluster: 1,
215                x_advance: 600,
216                y_advance: 0,
217                x_offset: 0,
218                y_offset: 0,
219            },
220            ShapedGlyph {
221                glyph_id: 3,
222                cluster: 2,
223                x_advance: 500,
224                y_advance: 0,
225                x_offset: 0,
226                y_offset: 0,
227            },
228        ];
229        let widths = cluster_widths(&glyphs, 3, 1000, 10.0, 0.0);
230        assert_eq!(widths.len(), 3);
231        assert!((widths[0] - 5.0).abs() < 0.001); // 500/1000 * 10
232        assert!((widths[1] - 6.0).abs() < 0.001); // 600/1000 * 10
233        assert!((widths[2] - 5.0).abs() < 0.001);
234    }
235
236    #[test]
237    fn test_cluster_widths_ligature() {
238        // Simulate a ligature: 1 glyph for 2 chars (cluster 0 covers chars 0 and 1)
239        let glyphs = vec![
240            ShapedGlyph {
241                glyph_id: 100,
242                cluster: 0,
243                x_advance: 800,
244                y_advance: 0,
245                x_offset: 0,
246                y_offset: 0,
247            },
248            ShapedGlyph {
249                glyph_id: 3,
250                cluster: 2,
251                x_advance: 500,
252                y_advance: 0,
253                x_offset: 0,
254                y_offset: 0,
255            },
256        ];
257        let widths = cluster_widths(&glyphs, 3, 1000, 10.0, 0.0);
258        assert_eq!(widths.len(), 3);
259        assert!((widths[0] - 8.0).abs() < 0.001); // Ligature gets full width
260        assert!((widths[1] - 0.0).abs() < 0.001); // Second char of ligature = 0
261        assert!((widths[2] - 5.0).abs() < 0.001);
262    }
263}