forme/text/shaping.rs
1//! # OpenType Shaping
2//!
3//! Wraps rustybuzz to perform OpenType shaping (GSUB/GPOS) on text.
4//! This produces real glyph IDs, kerning offsets, and ligature substitutions
5//! instead of the naive char-as-u16 glyph IDs used before.
6//!
7//! Standard PDF fonts (Helvetica, Times, Courier) bypass shaping entirely —
8//! they use WinAnsi encoding and don't have GSUB/GPOS tables.
9
10/// A single glyph produced by OpenType shaping.
11#[derive(Debug, Clone)]
12pub struct ShapedGlyph {
13 /// Real glyph ID from GSUB (not a Unicode codepoint).
14 pub glyph_id: u16,
15 /// Index of the first character in the input text that maps to this glyph.
16 /// Multiple chars may map to one glyph (ligatures), or one char may
17 /// produce multiple glyphs (decomposition).
18 pub cluster: u32,
19 /// Horizontal advance in font units.
20 pub x_advance: i32,
21 /// Vertical advance in font units (usually 0 for horizontal text).
22 pub y_advance: i32,
23 /// Horizontal offset from GPOS (kerning, mark positioning).
24 pub x_offset: i32,
25 /// Vertical offset from GPOS.
26 pub y_offset: i32,
27}
28
29/// Shape text using the given font data.
30///
31/// Returns `None` if the font data can't be parsed. For standard fonts
32/// (no font data), callers should skip shaping entirely.
33pub fn shape_text(text: &str, font_data: &[u8]) -> Option<Vec<ShapedGlyph>> {
34 shape_text_with_direction(text, font_data, false)
35}
36
37/// Shape text with explicit direction control.
38///
39/// When `is_rtl` is true, the shaper applies RTL contextual forms (e.g.,
40/// Arabic initial/medial/final forms) and produces glyphs in visual order.
41pub fn shape_text_with_direction(
42 text: &str,
43 font_data: &[u8],
44 is_rtl: bool,
45) -> Option<Vec<ShapedGlyph>> {
46 let face = rustybuzz::Face::from_slice(font_data, 0)?;
47 let mut buffer = rustybuzz::UnicodeBuffer::new();
48 buffer.push_str(text);
49 if is_rtl {
50 buffer.set_direction(rustybuzz::Direction::RightToLeft);
51 }
52
53 let output = rustybuzz::shape(&face, &[], buffer);
54
55 let infos = output.glyph_infos();
56 let positions = output.glyph_positions();
57
58 // rustybuzz returns byte offsets in cluster values, but downstream code
59 // indexes into char arrays. Convert byte offsets to char indices.
60 let byte_to_char: std::collections::HashMap<usize, usize> = text
61 .char_indices()
62 .enumerate()
63 .map(|(ci, (bi, _))| (bi, ci))
64 .collect();
65
66 let glyphs = infos
67 .iter()
68 .zip(positions.iter())
69 .map(|(info, pos)| {
70 let char_idx = byte_to_char
71 .get(&(info.cluster as usize))
72 .copied()
73 .unwrap_or(0);
74 ShapedGlyph {
75 glyph_id: info.glyph_id as u16,
76 cluster: char_idx as u32,
77 x_advance: pos.x_advance,
78 y_advance: pos.y_advance,
79 x_offset: pos.x_offset,
80 y_offset: pos.y_offset,
81 }
82 })
83 .collect();
84
85 Some(glyphs)
86}
87
88/// Shape a segment of text for a specific font, returning shaped glyphs
89/// and a mapping from glyph index to character range.
90///
91/// `char_offset` is added to cluster values so they reference positions
92/// in a larger text buffer (useful for multi-run shaping).
93pub fn shape_text_with_offset(
94 text: &str,
95 font_data: &[u8],
96 char_offset: u32,
97) -> Option<Vec<ShapedGlyph>> {
98 let mut glyphs = shape_text(text, font_data)?;
99 for g in &mut glyphs {
100 g.cluster += char_offset;
101 }
102 Some(glyphs)
103}
104
105/// Compute the total advance width of shaped glyphs in points.
106pub fn shaped_width(glyphs: &[ShapedGlyph], units_per_em: u16, font_size: f64) -> f64 {
107 let scale = font_size / units_per_em as f64;
108 glyphs.iter().map(|g| g.x_advance as f64 * scale).sum()
109}
110
111/// Compute per-cluster widths from shaped glyphs.
112///
113/// Returns a Vec where index i is the width contributed by the glyph(s)
114/// whose cluster value is i. This accounts for ligatures (one glyph for
115/// multiple chars) and decomposition (multiple glyphs for one char).
116///
117/// `num_chars` is the total number of characters in the input text.
118pub fn cluster_widths(
119 glyphs: &[ShapedGlyph],
120 num_chars: usize,
121 units_per_em: u16,
122 font_size: f64,
123 letter_spacing: f64,
124) -> Vec<f64> {
125 let scale = font_size / units_per_em as f64;
126 let mut widths = vec![0.0_f64; num_chars];
127
128 for glyph in glyphs {
129 let cluster = glyph.cluster as usize;
130 if cluster < num_chars {
131 widths[cluster] += glyph.x_advance as f64 * scale + letter_spacing;
132 }
133 }
134
135 // For ligatures, the first char of the cluster gets the full width,
136 // and subsequent chars in the same cluster get zero width.
137 // We need to identify cluster ranges and zero out non-first chars.
138 if !glyphs.is_empty() {
139 let mut cluster_starts: Vec<bool> = vec![false; num_chars];
140 for glyph in glyphs {
141 let c = glyph.cluster as usize;
142 if c < num_chars {
143 cluster_starts[c] = true;
144 }
145 }
146
147 // Walk through chars: if a char's cluster_starts is false and the
148 // previous char's cluster value would encompass it (ligature), its
149 // width should be 0 (the first char of the cluster already has it).
150 // We detect this by checking: chars not in cluster_starts that follow
151 // a cluster_start should have width 0.
152 let mut in_ligature = false;
153 for i in 0..num_chars {
154 if cluster_starts[i] {
155 in_ligature = false;
156 } else if i > 0 {
157 // This char wasn't the start of any glyph cluster.
158 // It's part of a ligature with the previous cluster.
159 in_ligature = true;
160 }
161 if in_ligature {
162 widths[i] = 0.0;
163 }
164 }
165 }
166
167 widths
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 // Helper: create a minimal test font (we can't easily create one in tests,
175 // so we test the public functions with None returns for invalid data)
176 #[test]
177 fn test_shape_text_invalid_font() {
178 let result = shape_text("Hello", &[0, 1, 2, 3]);
179 assert!(result.is_none());
180 }
181
182 #[test]
183 fn test_shape_text_empty() {
184 let result = shape_text("", &[0, 1, 2, 3]);
185 assert!(result.is_none());
186 }
187
188 #[test]
189 fn test_shaped_width_empty() {
190 let width = shaped_width(&[], 1000, 12.0);
191 assert_eq!(width, 0.0);
192 }
193
194 #[test]
195 fn test_cluster_widths_empty() {
196 let widths = cluster_widths(&[], 0, 1000, 12.0, 0.0);
197 assert!(widths.is_empty());
198 }
199
200 #[test]
201 fn test_cluster_widths_basic() {
202 // Simulate 3 glyphs for 3 chars, each in its own cluster
203 let glyphs = vec![
204 ShapedGlyph {
205 glyph_id: 1,
206 cluster: 0,
207 x_advance: 500,
208 y_advance: 0,
209 x_offset: 0,
210 y_offset: 0,
211 },
212 ShapedGlyph {
213 glyph_id: 2,
214 cluster: 1,
215 x_advance: 600,
216 y_advance: 0,
217 x_offset: 0,
218 y_offset: 0,
219 },
220 ShapedGlyph {
221 glyph_id: 3,
222 cluster: 2,
223 x_advance: 500,
224 y_advance: 0,
225 x_offset: 0,
226 y_offset: 0,
227 },
228 ];
229 let widths = cluster_widths(&glyphs, 3, 1000, 10.0, 0.0);
230 assert_eq!(widths.len(), 3);
231 assert!((widths[0] - 5.0).abs() < 0.001); // 500/1000 * 10
232 assert!((widths[1] - 6.0).abs() < 0.001); // 600/1000 * 10
233 assert!((widths[2] - 5.0).abs() < 0.001);
234 }
235
236 #[test]
237 fn test_cluster_widths_ligature() {
238 // Simulate a ligature: 1 glyph for 2 chars (cluster 0 covers chars 0 and 1)
239 let glyphs = vec![
240 ShapedGlyph {
241 glyph_id: 100,
242 cluster: 0,
243 x_advance: 800,
244 y_advance: 0,
245 x_offset: 0,
246 y_offset: 0,
247 },
248 ShapedGlyph {
249 glyph_id: 3,
250 cluster: 2,
251 x_advance: 500,
252 y_advance: 0,
253 x_offset: 0,
254 y_offset: 0,
255 },
256 ];
257 let widths = cluster_widths(&glyphs, 3, 1000, 10.0, 0.0);
258 assert_eq!(widths.len(), 3);
259 assert!((widths[0] - 8.0).abs() < 0.001); // Ligature gets full width
260 assert!((widths[1] - 0.0).abs() < 0.001); // Second char of ligature = 0
261 assert!((widths[2] - 5.0).abs() < 0.001);
262 }
263}