Skip to main content

text_typeset/layout/
paragraph.rs

1use std::collections::HashSet;
2use std::ops::Range;
3
4use unicode_linebreak::{BreakOpportunity, linebreaks};
5
6use crate::layout::line::{LayoutLine, PositionedRun, RunDecorations};
7use crate::shaping::run::ShapedRun;
8use crate::shaping::shaper::FontMetricsPx;
9
10/// Convert a byte offset within a UTF-8 string to a char offset.
11///
12/// Clamps to `text.len()` and rounds down to the nearest char boundary
13/// if `byte_offset` lands inside a multi-byte character. HarfBuzz can
14/// emit cluster values that don't coincide with UTF-8 char boundaries
15/// (ligature splits, fallback shaping), so callers must never assume
16/// cluster values are well-aligned.
17fn byte_offset_to_char_offset(text: &str, byte_offset: usize) -> usize {
18    let mut off = byte_offset.min(text.len());
19    while off > 0 && !text.is_char_boundary(off) {
20        off -= 1;
21    }
22    text[..off].chars().count()
23}
24
25/// Text alignment within a line.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
27pub enum Alignment {
28    #[default]
29    Left,
30    Right,
31    Center,
32    Justify,
33}
34
35/// Break shaped runs into lines that fit within `available_width`.
36///
37/// Strategy: shape-first-then-break.
38/// 1. The caller has already shaped the full paragraph into one or more ShapedRuns.
39/// 2. We use unicode-linebreak to find break opportunities in the original text.
40/// 3. We map break positions to glyph boundaries via cluster values.
41/// 4. Greedy line wrapping: accumulate glyph advances, break at the last
42///    allowed opportunity before exceeding the width.
43/// 5. Apply alignment per line.
44pub fn break_into_lines(
45    runs: Vec<ShapedRun>,
46    text: &str,
47    available_width: f32,
48    alignment: Alignment,
49    first_line_indent: f32,
50    metrics: &FontMetricsPx,
51) -> Vec<LayoutLine> {
52    if runs.is_empty() || text.is_empty() {
53        // Empty paragraph: produce one empty line for the block to have height
54        return vec![make_empty_line(metrics, 0..0)];
55    }
56
57    // Flatten all glyphs into a single sequence with their run association
58    let flat = flatten_runs(&runs);
59    if flat.is_empty() {
60        return vec![make_empty_line(metrics, 0..0)];
61    }
62
63    // Get break opportunities from unicode-linebreak (byte offsets in text)
64    let breaks: Vec<(usize, BreakOpportunity)> = linebreaks(text).collect();
65
66    // Build sets of allowed and mandatory break positions (glyph indices)
67    let (break_points, mandatory_breaks) = map_breaks_to_glyph_indices(&flat, &breaks);
68
69    // Greedy line wrapping
70    let mut lines = Vec::new();
71    let mut line_start_glyph = 0usize;
72    let mut line_width = 0.0f32;
73    let mut last_break_glyph: Option<usize> = None;
74    // First line may be indented; subsequent lines use full width
75    let mut effective_width = available_width - first_line_indent;
76
77    for i in 0..flat.len() {
78        let glyph_advance = flat[i].x_advance;
79        line_width += glyph_advance;
80
81        // Check for mandatory break — O(1) HashSet lookup
82        let is_mandatory = mandatory_breaks.contains(&(i + 1));
83
84        let exceeds_width = line_width > effective_width && line_start_glyph < i;
85
86        if is_mandatory || exceeds_width {
87            let break_at = if is_mandatory {
88                i + 1
89            } else if let Some(bp) = last_break_glyph {
90                if bp > line_start_glyph {
91                    bp
92                } else {
93                    i + 1 // emergency break -no opportunity found
94                }
95            } else {
96                i + 1 // emergency break -no break opportunities at all
97            };
98
99            let indent = if lines.is_empty() {
100                first_line_indent
101            } else {
102                0.0
103            };
104            let line = build_line(
105                &runs,
106                &flat,
107                line_start_glyph,
108                break_at,
109                metrics,
110                indent,
111                text,
112            );
113            lines.push(line);
114
115            line_start_glyph = break_at;
116            // Subsequent lines use full available width
117            effective_width = available_width;
118            // Re-accumulate width for glyphs already scanned past the break
119            line_width = 0.0;
120            for j in break_at..=i {
121                if j < flat.len() {
122                    line_width += flat[j].x_advance;
123                }
124            }
125            last_break_glyph = None;
126        }
127
128        // Update break point AFTER the width check so that a break opportunity
129        // discovered at this glyph does not clobber the previous one when the
130        // width is already exceeded. This prevents the end-of-text mandatory
131        // break from keeping the last glyph on an overflowing line.
132        if break_points.contains(&(i + 1)) {
133            last_break_glyph = Some(i + 1);
134        }
135    }
136
137    // Remaining glyphs form the last line
138    if line_start_glyph < flat.len() {
139        let line = build_line(
140            &runs,
141            &flat,
142            line_start_glyph,
143            flat.len(),
144            metrics,
145            if lines.is_empty() {
146                first_line_indent
147            } else {
148                0.0
149            },
150            text,
151        );
152        lines.push(line);
153    }
154
155    // Apply alignment
156    let effective_width = available_width;
157    let last_idx = lines.len().saturating_sub(1);
158    for (i, line) in lines.iter_mut().enumerate() {
159        let indent = if i == 0 { first_line_indent } else { 0.0 };
160        let line_avail = effective_width - indent;
161        match alignment {
162            Alignment::Left => {} // runs already at x=0 (plus indent)
163            Alignment::Right => {
164                let shift = (line_avail - line.width).max(0.0);
165                for run in &mut line.runs {
166                    run.x += shift;
167                }
168            }
169            Alignment::Center => {
170                let shift = ((line_avail - line.width) / 2.0).max(0.0);
171                for run in &mut line.runs {
172                    run.x += shift;
173                }
174            }
175            Alignment::Justify => {
176                // Don't justify the last line
177                if i < last_idx && line.width > 0.0 {
178                    justify_line(line, line_avail, text);
179                }
180            }
181        }
182    }
183
184    if lines.is_empty() {
185        lines.push(make_empty_line(metrics, 0..0));
186    }
187
188    // Convert glyph cluster values from byte offsets to char offsets.
189    // This must happen AFTER alignment because justify_line needs byte
190    // offsets to find space characters in the original text.
191    for line in &mut lines {
192        for run in &mut line.runs {
193            for glyph in &mut run.shaped_run.glyphs {
194                glyph.cluster = byte_offset_to_char_offset(text, glyph.cluster as usize) as u32;
195            }
196        }
197    }
198
199    lines
200}
201
202/// A flattened glyph with enough info to map back to runs.
203struct FlatGlyph {
204    x_advance: f32,
205    cluster: u32,
206    run_index: usize,
207    glyph_index_in_run: usize,
208}
209
210fn flatten_runs(runs: &[ShapedRun]) -> Vec<FlatGlyph> {
211    let mut flat = Vec::new();
212    for (run_idx, run) in runs.iter().enumerate() {
213        // Offset cluster values from fragment-text space to block-text space.
214        // rustybuzz assigns clusters as byte offsets within the fragment text (0-based),
215        // but unicode-linebreak returns byte offsets in the full block text.
216        let cluster_offset = run.text_range.start as u32;
217        for (glyph_idx, glyph) in run.glyphs.iter().enumerate() {
218            flat.push(FlatGlyph {
219                x_advance: glyph.x_advance,
220                cluster: glyph.cluster + cluster_offset,
221                run_index: run_idx,
222                glyph_index_in_run: glyph_idx,
223            });
224        }
225    }
226    flat
227}
228
229/// Map unicode-linebreak byte offsets to glyph indices using a merged walk.
230/// Both `flat` (by cluster) and `breaks` (by byte offset) are sorted,
231/// so a single O(b + m) pass replaces the previous O(b × m) approach.
232///
233/// Returns (break_points: HashSet<glyph_idx>, mandatory_breaks: HashSet<glyph_idx>).
234fn map_breaks_to_glyph_indices(
235    flat: &[FlatGlyph],
236    breaks: &[(usize, BreakOpportunity)],
237) -> (HashSet<usize>, HashSet<usize>) {
238    let mut break_points = HashSet::new();
239    let mut mandatory_breaks = HashSet::new();
240    let mut glyph_cursor = 0usize;
241
242    for &(byte_offset, opportunity) in breaks {
243        // Advance glyph cursor to the first glyph whose cluster >= byte_offset
244        while glyph_cursor < flat.len() && (flat[glyph_cursor].cluster as usize) < byte_offset {
245            glyph_cursor += 1;
246        }
247        let glyph_idx = if glyph_cursor < flat.len() {
248            glyph_cursor
249        } else {
250            flat.len()
251        };
252        break_points.insert(glyph_idx);
253        if opportunity == BreakOpportunity::Mandatory {
254            mandatory_breaks.insert(glyph_idx);
255        }
256    }
257
258    (break_points, mandatory_breaks)
259}
260
261/// Build a LayoutLine from a glyph range within the flat sequence.
262fn build_line(
263    runs: &[ShapedRun],
264    flat: &[FlatGlyph],
265    start: usize,
266    end: usize,
267    metrics: &FontMetricsPx,
268    indent: f32,
269    text: &str,
270) -> LayoutLine {
271    // Group consecutive glyphs by run_index to reconstruct PositionedRuns
272    let mut positioned_runs = Vec::new();
273    let mut x = indent;
274    let mut current_run_idx: Option<usize> = None;
275    let mut run_glyph_start = 0usize;
276
277    for i in start..end {
278        let fg = &flat[i];
279        if current_run_idx != Some(fg.run_index) {
280            // Emit previous run segment if any
281            if let Some(prev_run_idx) = current_run_idx {
282                // End of previous run: use the last glyph we saw from that run
283                let prev_end = if i > start {
284                    flat[i - 1].glyph_index_in_run + 1
285                } else {
286                    run_glyph_start
287                };
288                let sub_run = extract_sub_run(runs, prev_run_idx, run_glyph_start, prev_end);
289                if let Some((pr, advance)) = sub_run {
290                    positioned_runs.push(PositionedRun {
291                        decorations: RunDecorations {
292                            underline_style: pr.underline_style,
293                            overline: pr.overline,
294                            strikeout: pr.strikeout,
295                            is_link: pr.is_link,
296                            foreground_color: pr.foreground_color,
297                            underline_color: pr.underline_color,
298                            background_color: pr.background_color,
299                            anchor_href: pr.anchor_href.clone(),
300                            tooltip: pr.tooltip.clone(),
301                            vertical_alignment: pr.vertical_alignment,
302                        },
303                        shaped_run: pr,
304                        x,
305                    });
306                    x += advance;
307                }
308            }
309            current_run_idx = Some(fg.run_index);
310            run_glyph_start = fg.glyph_index_in_run;
311        }
312    }
313
314    // Emit final run segment
315    if let Some(run_idx) = current_run_idx {
316        let end_in_run = if end < flat.len() && flat[end].run_index == run_idx {
317            flat[end].glyph_index_in_run
318        } else if end > start {
319            flat[end - 1].glyph_index_in_run + 1
320        } else {
321            run_glyph_start
322        };
323        let sub_run = extract_sub_run(runs, run_idx, run_glyph_start, end_in_run);
324        if let Some((pr, advance)) = sub_run {
325            positioned_runs.push(PositionedRun {
326                decorations: RunDecorations {
327                    underline_style: pr.underline_style,
328                    overline: pr.overline,
329                    strikeout: pr.strikeout,
330                    is_link: pr.is_link,
331                    foreground_color: pr.foreground_color,
332                    underline_color: pr.underline_color,
333                    background_color: pr.background_color,
334                    anchor_href: pr.anchor_href.clone(),
335                    tooltip: pr.tooltip.clone(),
336                    vertical_alignment: pr.vertical_alignment,
337                },
338                shaped_run: pr,
339                x,
340            });
341            x += advance;
342        }
343    }
344
345    let width = x - indent;
346
347    // Compute char range from cluster values.
348    // Clusters from rustybuzz are byte offsets — convert to char offsets
349    // so that positions match text-document's character-based coordinates.
350    let byte_start = if start < flat.len() {
351        flat[start].cluster as usize
352    } else {
353        0
354    };
355    let byte_end = if end > 0 && end <= flat.len() {
356        if end < flat.len() {
357            flat[end].cluster as usize
358        } else {
359            let byte_offset = flat[end - 1].cluster as usize;
360            let char_len = text
361                .get(byte_offset..)
362                .and_then(|s| s.chars().next())
363                .map(|c| c.len_utf8())
364                .unwrap_or(1);
365            byte_offset + char_len
366        }
367    } else {
368        byte_start
369    };
370    let char_start = byte_offset_to_char_offset(text, byte_start);
371    let char_end = byte_offset_to_char_offset(text, byte_end);
372
373    // Expand line height for inline images taller than the font ascent
374    let mut ascent = metrics.ascent;
375    for run in &positioned_runs {
376        if run.shaped_run.image_name.is_some() && run.shaped_run.image_height > ascent {
377            ascent = run.shaped_run.image_height;
378        }
379    }
380    let line_height = ascent + metrics.descent + metrics.leading;
381
382    LayoutLine {
383        runs: positioned_runs,
384        y: 0.0, // will be set by the caller (block layout)
385        ascent,
386        descent: metrics.descent,
387        leading: metrics.leading,
388        width,
389        char_range: char_start..char_end,
390        line_height,
391    }
392}
393
394/// Extract a sub-run (slice of glyphs) from a ShapedRun.
395/// Cluster values are offset to block-text space (adding text_range.start).
396fn extract_sub_run(
397    runs: &[ShapedRun],
398    run_index: usize,
399    glyph_start: usize,
400    glyph_end: usize,
401) -> Option<(ShapedRun, f32)> {
402    let run = &runs[run_index];
403    let end = glyph_end.min(run.glyphs.len());
404    if glyph_start >= end {
405        return None;
406    }
407    let cluster_offset = run.text_range.start as u32;
408    let mut sub_glyphs = run.glyphs[glyph_start..end].to_vec();
409    // Offset cluster values from fragment-local to block-text space
410    for g in &mut sub_glyphs {
411        g.cluster += cluster_offset;
412    }
413    let advance: f32 = sub_glyphs.iter().map(|g| g.x_advance).sum();
414
415    let sub_run = ShapedRun {
416        font_face_id: run.font_face_id,
417        size_px: run.size_px,
418        glyphs: sub_glyphs,
419        advance_width: advance,
420        text_range: run.text_range.clone(),
421        underline_style: run.underline_style,
422        overline: run.overline,
423        strikeout: run.strikeout,
424        is_link: run.is_link,
425        foreground_color: run.foreground_color,
426        underline_color: run.underline_color,
427        background_color: run.background_color,
428        anchor_href: run.anchor_href.clone(),
429        tooltip: run.tooltip.clone(),
430        vertical_alignment: run.vertical_alignment,
431        image_name: run.image_name.clone(),
432        image_height: run.image_height,
433    };
434    Some((sub_run, advance))
435}
436
437fn make_empty_line(metrics: &FontMetricsPx, char_range: Range<usize>) -> LayoutLine {
438    LayoutLine {
439        runs: Vec::new(),
440        y: 0.0,
441        ascent: metrics.ascent,
442        descent: metrics.descent,
443        leading: metrics.leading,
444        width: 0.0,
445        char_range,
446        line_height: metrics.ascent + metrics.descent + metrics.leading,
447    }
448}
449
450/// Distribute extra space among word gaps for justification.
451///
452/// Finds space glyphs (cluster mapping to ' ') across all runs and
453/// increases their x_advance proportionally. Then recomputes run x positions.
454fn justify_line(line: &mut LayoutLine, target_width: f32, text: &str) {
455    let extra = target_width - line.width;
456    if extra <= 0.0 {
457        return;
458    }
459
460    // Count space glyphs across all runs
461    let mut space_count = 0usize;
462    for run in &line.runs {
463        for glyph in &run.shaped_run.glyphs {
464            let byte_offset = glyph.cluster as usize;
465            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
466                && ch == ' '
467            {
468                space_count += 1;
469            }
470        }
471    }
472
473    if space_count == 0 {
474        return;
475    }
476
477    let extra_per_space = extra / space_count as f32;
478
479    // Increase x_advance of space glyphs
480    for run in &mut line.runs {
481        for glyph in &mut run.shaped_run.glyphs {
482            let byte_offset = glyph.cluster as usize;
483            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
484                && ch == ' '
485            {
486                glyph.x_advance += extra_per_space;
487            }
488        }
489        // Recompute run advance width
490        run.shaped_run.advance_width = run.shaped_run.glyphs.iter().map(|g| g.x_advance).sum();
491    }
492
493    // Recompute run x positions (runs follow each other)
494    let first_x = line.runs.first().map(|r| r.x).unwrap_or(0.0);
495    let mut x = first_x;
496    for run in &mut line.runs {
497        run.x = x;
498        x += run.shaped_run.advance_width;
499    }
500
501    line.width = target_width;
502}