Skip to main content

text_typeset/layout/
paragraph.rs

1use std::collections::HashSet;
2use std::ops::Range;
3
4use unicode_linebreak::{BreakOpportunity, linebreaks};
5
6use crate::layout::line::{LayoutLine, PositionedRun, RunDecorations};
7use crate::shaping::run::ShapedRun;
8use crate::shaping::shaper::FontMetricsPx;
9
10/// Convert a byte offset within a UTF-8 string to a char offset.
11fn byte_offset_to_char_offset(text: &str, byte_offset: usize) -> usize {
12    text[..byte_offset.min(text.len())].chars().count()
13}
14
15/// Text alignment within a line.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17pub enum Alignment {
18    #[default]
19    Left,
20    Right,
21    Center,
22    Justify,
23}
24
25/// Break shaped runs into lines that fit within `available_width`.
26///
27/// Strategy: shape-first-then-break.
28/// 1. The caller has already shaped the full paragraph into one or more ShapedRuns.
29/// 2. We use unicode-linebreak to find break opportunities in the original text.
30/// 3. We map break positions to glyph boundaries via cluster values.
31/// 4. Greedy line wrapping: accumulate glyph advances, break at the last
32///    allowed opportunity before exceeding the width.
33/// 5. Apply alignment per line.
34pub fn break_into_lines(
35    runs: Vec<ShapedRun>,
36    text: &str,
37    available_width: f32,
38    alignment: Alignment,
39    first_line_indent: f32,
40    metrics: &FontMetricsPx,
41) -> Vec<LayoutLine> {
42    if runs.is_empty() || text.is_empty() {
43        // Empty paragraph: produce one empty line for the block to have height
44        return vec![make_empty_line(metrics, 0..0)];
45    }
46
47    // Flatten all glyphs into a single sequence with their run association
48    let flat = flatten_runs(&runs);
49    if flat.is_empty() {
50        return vec![make_empty_line(metrics, 0..0)];
51    }
52
53    // Get break opportunities from unicode-linebreak (byte offsets in text)
54    let breaks: Vec<(usize, BreakOpportunity)> = linebreaks(text).collect();
55
56    // Build a set of allowed break positions (glyph indices)
57    let break_points: HashSet<usize> = map_breaks_to_glyph_indices(&flat, &breaks);
58
59    // Greedy line wrapping
60    let mut lines = Vec::new();
61    let mut line_start_glyph = 0usize;
62    let mut line_width = 0.0f32;
63    let mut last_break_glyph: Option<usize> = None;
64    // First line may be indented; subsequent lines use full width
65    let mut effective_width = available_width - first_line_indent;
66
67    for i in 0..flat.len() {
68        let glyph_advance = flat[i].x_advance;
69        line_width += glyph_advance;
70
71        // Check if this glyph index is a break point
72        if break_points.contains(&(i + 1)) {
73            last_break_glyph = Some(i + 1);
74        }
75
76        // Check for mandatory break
77        let is_mandatory = break_points_mandatory(&breaks, &flat, i + 1);
78
79        let exceeds_width = line_width > effective_width && line_start_glyph < i;
80
81        if is_mandatory || exceeds_width {
82            let break_at = if is_mandatory {
83                i + 1
84            } else if let Some(bp) = last_break_glyph {
85                if bp > line_start_glyph {
86                    bp
87                } else {
88                    i + 1 // emergency break -no opportunity found
89                }
90            } else {
91                i + 1 // emergency break -no break opportunities at all
92            };
93
94            let indent = if lines.is_empty() {
95                first_line_indent
96            } else {
97                0.0
98            };
99            let line = build_line(
100                &runs,
101                &flat,
102                line_start_glyph,
103                break_at,
104                metrics,
105                indent,
106                text,
107            );
108            lines.push(line);
109
110            line_start_glyph = break_at;
111            // Subsequent lines use full available width
112            effective_width = available_width;
113            // Re-accumulate width for glyphs already scanned past the break
114            line_width = 0.0;
115            for j in break_at..=i {
116                if j < flat.len() {
117                    line_width += flat[j].x_advance;
118                }
119            }
120            last_break_glyph = None;
121        }
122    }
123
124    // Remaining glyphs form the last line
125    if line_start_glyph < flat.len() {
126        let line = build_line(
127            &runs,
128            &flat,
129            line_start_glyph,
130            flat.len(),
131            metrics,
132            if lines.is_empty() {
133                first_line_indent
134            } else {
135                0.0
136            },
137            text,
138        );
139        lines.push(line);
140    }
141
142    // Apply alignment
143    let effective_width = available_width;
144    let last_idx = lines.len().saturating_sub(1);
145    for (i, line) in lines.iter_mut().enumerate() {
146        let indent = if i == 0 { first_line_indent } else { 0.0 };
147        let line_avail = effective_width - indent;
148        match alignment {
149            Alignment::Left => {} // runs already at x=0 (plus indent)
150            Alignment::Right => {
151                let shift = (line_avail - line.width).max(0.0);
152                for run in &mut line.runs {
153                    run.x += shift;
154                }
155            }
156            Alignment::Center => {
157                let shift = ((line_avail - line.width) / 2.0).max(0.0);
158                for run in &mut line.runs {
159                    run.x += shift;
160                }
161            }
162            Alignment::Justify => {
163                // Don't justify the last line
164                if i < last_idx && line.width > 0.0 {
165                    justify_line(line, line_avail, text);
166                }
167            }
168        }
169    }
170
171    if lines.is_empty() {
172        lines.push(make_empty_line(metrics, 0..0));
173    }
174
175    lines
176}
177
178/// A flattened glyph with enough info to map back to runs.
179struct FlatGlyph {
180    x_advance: f32,
181    cluster: u32,
182    run_index: usize,
183    glyph_index_in_run: usize,
184}
185
186fn flatten_runs(runs: &[ShapedRun]) -> Vec<FlatGlyph> {
187    let mut flat = Vec::new();
188    for (run_idx, run) in runs.iter().enumerate() {
189        // Offset cluster values from fragment-text space to block-text space.
190        // rustybuzz assigns clusters as byte offsets within the fragment text (0-based),
191        // but unicode-linebreak returns byte offsets in the full block text.
192        let cluster_offset = run.text_range.start as u32;
193        for (glyph_idx, glyph) in run.glyphs.iter().enumerate() {
194            flat.push(FlatGlyph {
195                x_advance: glyph.x_advance,
196                cluster: glyph.cluster + cluster_offset,
197                run_index: run_idx,
198                glyph_index_in_run: glyph_idx,
199            });
200        }
201    }
202    flat
203}
204
205/// Map unicode-linebreak byte offsets to glyph indices.
206/// A break at byte offset B maps to the glyph whose cluster >= B.
207fn map_breaks_to_glyph_indices(
208    flat: &[FlatGlyph],
209    breaks: &[(usize, BreakOpportunity)],
210) -> HashSet<usize> {
211    let mut result = HashSet::new();
212    for &(byte_offset, _opportunity) in breaks {
213        // Find the first glyph whose cluster starts at or after byte_offset
214        if let Some(glyph_idx) = flat.iter().position(|g| g.cluster as usize >= byte_offset) {
215            result.insert(glyph_idx);
216        } else {
217            // Break is at or past end of text
218            result.insert(flat.len());
219        }
220    }
221    result
222}
223
224/// Check if glyph index `glyph_idx` corresponds to a mandatory break.
225fn break_points_mandatory(
226    breaks: &[(usize, BreakOpportunity)],
227    flat: &[FlatGlyph],
228    glyph_idx: usize,
229) -> bool {
230    if glyph_idx == 0 || glyph_idx > flat.len() {
231        return false;
232    }
233
234    // Find the byte offset for this glyph boundary
235    let byte_offset = if glyph_idx < flat.len() {
236        flat[glyph_idx].cluster as usize
237    } else {
238        // Past last glyph -check if there's a mandatory break at the end
239        return false;
240    };
241
242    breaks
243        .iter()
244        .any(|&(bo, op)| bo == byte_offset && op == BreakOpportunity::Mandatory)
245}
246
247/// Build a LayoutLine from a glyph range within the flat sequence.
248fn build_line(
249    runs: &[ShapedRun],
250    flat: &[FlatGlyph],
251    start: usize,
252    end: usize,
253    metrics: &FontMetricsPx,
254    indent: f32,
255    text: &str,
256) -> LayoutLine {
257    // Group consecutive glyphs by run_index to reconstruct PositionedRuns
258    let mut positioned_runs = Vec::new();
259    let mut x = indent;
260    let mut current_run_idx: Option<usize> = None;
261    let mut run_glyph_start = 0usize;
262
263    for i in start..end {
264        let fg = &flat[i];
265        if current_run_idx != Some(fg.run_index) {
266            // Emit previous run segment if any
267            if let Some(prev_run_idx) = current_run_idx {
268                // End of previous run: use the last glyph we saw from that run
269                let prev_end = if i > start {
270                    flat[i - 1].glyph_index_in_run + 1
271                } else {
272                    run_glyph_start
273                };
274                let sub_run = extract_sub_run(runs, prev_run_idx, run_glyph_start, prev_end);
275                if let Some((pr, advance)) = sub_run {
276                    positioned_runs.push(PositionedRun {
277                        decorations: RunDecorations {
278                            underline: pr.underline,
279                            overline: pr.overline,
280                            strikeout: pr.strikeout,
281                            is_link: pr.is_link,
282                        },
283                        shaped_run: pr,
284                        x,
285                    });
286                    x += advance;
287                }
288            }
289            current_run_idx = Some(fg.run_index);
290            run_glyph_start = fg.glyph_index_in_run;
291        }
292    }
293
294    // Emit final run segment
295    if let Some(run_idx) = current_run_idx {
296        let end_in_run = if end < flat.len() && flat[end].run_index == run_idx {
297            flat[end].glyph_index_in_run
298        } else if end > start {
299            flat[end - 1].glyph_index_in_run + 1
300        } else {
301            run_glyph_start
302        };
303        let sub_run = extract_sub_run(runs, run_idx, run_glyph_start, end_in_run);
304        if let Some((pr, advance)) = sub_run {
305            positioned_runs.push(PositionedRun {
306                decorations: RunDecorations {
307                    underline: pr.underline,
308                    overline: pr.overline,
309                    strikeout: pr.strikeout,
310                    is_link: pr.is_link,
311                },
312                shaped_run: pr,
313                x,
314            });
315            x += advance;
316        }
317    }
318
319    let width = x - indent;
320
321    // Compute char range from cluster values.
322    // Clusters from rustybuzz are byte offsets — convert to char offsets
323    // so that positions match text-document's character-based coordinates.
324    let byte_start = if start < flat.len() {
325        flat[start].cluster as usize
326    } else {
327        0
328    };
329    let byte_end = if end > 0 && end <= flat.len() {
330        if end < flat.len() {
331            flat[end].cluster as usize
332        } else {
333            let byte_offset = flat[end - 1].cluster as usize;
334            let char_len = text
335                .get(byte_offset..)
336                .and_then(|s| s.chars().next())
337                .map(|c| c.len_utf8())
338                .unwrap_or(1);
339            byte_offset + char_len
340        }
341    } else {
342        byte_start
343    };
344    let char_start = byte_offset_to_char_offset(text, byte_start);
345    let char_end = byte_offset_to_char_offset(text, byte_end);
346
347    // Convert glyph cluster values from byte offsets to char offsets
348    for run in &mut positioned_runs {
349        for glyph in &mut run.shaped_run.glyphs {
350            glyph.cluster = byte_offset_to_char_offset(text, glyph.cluster as usize) as u32;
351        }
352    }
353
354    let line_height = metrics.ascent + metrics.descent + metrics.leading;
355
356    LayoutLine {
357        runs: positioned_runs,
358        y: 0.0, // will be set by the caller (block layout)
359        ascent: metrics.ascent,
360        descent: metrics.descent,
361        leading: metrics.leading,
362        width,
363        char_range: char_start..char_end,
364        line_height,
365    }
366}
367
368/// Extract a sub-run (slice of glyphs) from a ShapedRun.
369/// Cluster values are offset to block-text space (adding text_range.start).
370fn extract_sub_run(
371    runs: &[ShapedRun],
372    run_index: usize,
373    glyph_start: usize,
374    glyph_end: usize,
375) -> Option<(ShapedRun, f32)> {
376    let run = &runs[run_index];
377    let end = glyph_end.min(run.glyphs.len());
378    if glyph_start >= end {
379        return None;
380    }
381    let cluster_offset = run.text_range.start as u32;
382    let mut sub_glyphs = run.glyphs[glyph_start..end].to_vec();
383    // Offset cluster values from fragment-local to block-text space
384    for g in &mut sub_glyphs {
385        g.cluster += cluster_offset;
386    }
387    let advance: f32 = sub_glyphs.iter().map(|g| g.x_advance).sum();
388
389    let sub_run = ShapedRun {
390        font_face_id: run.font_face_id,
391        size_px: run.size_px,
392        glyphs: sub_glyphs,
393        advance_width: advance,
394        text_range: run.text_range.clone(),
395        underline: run.underline,
396        overline: run.overline,
397        strikeout: run.strikeout,
398        is_link: run.is_link,
399    };
400    Some((sub_run, advance))
401}
402
403fn make_empty_line(metrics: &FontMetricsPx, char_range: Range<usize>) -> LayoutLine {
404    LayoutLine {
405        runs: Vec::new(),
406        y: 0.0,
407        ascent: metrics.ascent,
408        descent: metrics.descent,
409        leading: metrics.leading,
410        width: 0.0,
411        char_range,
412        line_height: metrics.ascent + metrics.descent + metrics.leading,
413    }
414}
415
416/// Distribute extra space among word gaps for justification.
417///
418/// Finds space glyphs (cluster mapping to ' ') across all runs and
419/// increases their x_advance proportionally. Then recomputes run x positions.
420fn justify_line(line: &mut LayoutLine, target_width: f32, text: &str) {
421    let extra = target_width - line.width;
422    if extra <= 0.0 {
423        return;
424    }
425
426    // Count space glyphs across all runs
427    let mut space_count = 0usize;
428    for run in &line.runs {
429        for glyph in &run.shaped_run.glyphs {
430            let byte_offset = glyph.cluster as usize;
431            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
432                && ch == ' '
433            {
434                space_count += 1;
435            }
436        }
437    }
438
439    if space_count == 0 {
440        return;
441    }
442
443    let extra_per_space = extra / space_count as f32;
444
445    // Increase x_advance of space glyphs
446    for run in &mut line.runs {
447        for glyph in &mut run.shaped_run.glyphs {
448            let byte_offset = glyph.cluster as usize;
449            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
450                && ch == ' '
451            {
452                glyph.x_advance += extra_per_space;
453            }
454        }
455        // Recompute run advance width
456        run.shaped_run.advance_width = run.shaped_run.glyphs.iter().map(|g| g.x_advance).sum();
457    }
458
459    // Recompute run x positions (runs follow each other)
460    let first_x = line.runs.first().map(|r| r.x).unwrap_or(0.0);
461    let mut x = first_x;
462    for run in &mut line.runs {
463        run.x = x;
464        x += run.shaped_run.advance_width;
465    }
466
467    line.width = target_width;
468}