Skip to main content

text_typeset/layout/
paragraph.rs

1use std::collections::HashSet;
2use std::ops::Range;
3
4use unicode_linebreak::{BreakOpportunity, linebreaks};
5
6use crate::layout::line::{LayoutLine, PositionedRun, RunDecorations};
7use crate::shaping::run::ShapedRun;
8use crate::shaping::shaper::FontMetricsPx;
9
10/// Convert a byte offset within a UTF-8 string to a char offset.
11fn byte_offset_to_char_offset(text: &str, byte_offset: usize) -> usize {
12    text[..byte_offset.min(text.len())].chars().count()
13}
14
15/// Text alignment within a line.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17pub enum Alignment {
18    #[default]
19    Left,
20    Right,
21    Center,
22    Justify,
23}
24
25/// Break shaped runs into lines that fit within `available_width`.
26///
27/// Strategy: shape-first-then-break.
28/// 1. The caller has already shaped the full paragraph into one or more ShapedRuns.
29/// 2. We use unicode-linebreak to find break opportunities in the original text.
30/// 3. We map break positions to glyph boundaries via cluster values.
31/// 4. Greedy line wrapping: accumulate glyph advances, break at the last
32///    allowed opportunity before exceeding the width.
33/// 5. Apply alignment per line.
34pub fn break_into_lines(
35    runs: Vec<ShapedRun>,
36    text: &str,
37    available_width: f32,
38    alignment: Alignment,
39    first_line_indent: f32,
40    metrics: &FontMetricsPx,
41) -> Vec<LayoutLine> {
42    if runs.is_empty() || text.is_empty() {
43        // Empty paragraph: produce one empty line for the block to have height
44        return vec![make_empty_line(metrics, 0..0)];
45    }
46
47    // Flatten all glyphs into a single sequence with their run association
48    let flat = flatten_runs(&runs);
49    if flat.is_empty() {
50        return vec![make_empty_line(metrics, 0..0)];
51    }
52
53    // Get break opportunities from unicode-linebreak (byte offsets in text)
54    let breaks: Vec<(usize, BreakOpportunity)> = linebreaks(text).collect();
55
56    // Build sets of allowed and mandatory break positions (glyph indices)
57    let (break_points, mandatory_breaks) = map_breaks_to_glyph_indices(&flat, &breaks);
58
59    // Greedy line wrapping
60    let mut lines = Vec::new();
61    let mut line_start_glyph = 0usize;
62    let mut line_width = 0.0f32;
63    let mut last_break_glyph: Option<usize> = None;
64    // First line may be indented; subsequent lines use full width
65    let mut effective_width = available_width - first_line_indent;
66
67    for i in 0..flat.len() {
68        let glyph_advance = flat[i].x_advance;
69        line_width += glyph_advance;
70
71        // Check for mandatory break — O(1) HashSet lookup
72        let is_mandatory = mandatory_breaks.contains(&(i + 1));
73
74        let exceeds_width = line_width > effective_width && line_start_glyph < i;
75
76        if is_mandatory || exceeds_width {
77            let break_at = if is_mandatory {
78                i + 1
79            } else if let Some(bp) = last_break_glyph {
80                if bp > line_start_glyph {
81                    bp
82                } else {
83                    i + 1 // emergency break -no opportunity found
84                }
85            } else {
86                i + 1 // emergency break -no break opportunities at all
87            };
88
89            let indent = if lines.is_empty() {
90                first_line_indent
91            } else {
92                0.0
93            };
94            let line = build_line(
95                &runs,
96                &flat,
97                line_start_glyph,
98                break_at,
99                metrics,
100                indent,
101                text,
102            );
103            lines.push(line);
104
105            line_start_glyph = break_at;
106            // Subsequent lines use full available width
107            effective_width = available_width;
108            // Re-accumulate width for glyphs already scanned past the break
109            line_width = 0.0;
110            for j in break_at..=i {
111                if j < flat.len() {
112                    line_width += flat[j].x_advance;
113                }
114            }
115            last_break_glyph = None;
116        }
117
118        // Update break point AFTER the width check so that a break opportunity
119        // discovered at this glyph does not clobber the previous one when the
120        // width is already exceeded. This prevents the end-of-text mandatory
121        // break from keeping the last glyph on an overflowing line.
122        if break_points.contains(&(i + 1)) {
123            last_break_glyph = Some(i + 1);
124        }
125    }
126
127    // Remaining glyphs form the last line
128    if line_start_glyph < flat.len() {
129        let line = build_line(
130            &runs,
131            &flat,
132            line_start_glyph,
133            flat.len(),
134            metrics,
135            if lines.is_empty() {
136                first_line_indent
137            } else {
138                0.0
139            },
140            text,
141        );
142        lines.push(line);
143    }
144
145    // Apply alignment
146    let effective_width = available_width;
147    let last_idx = lines.len().saturating_sub(1);
148    for (i, line) in lines.iter_mut().enumerate() {
149        let indent = if i == 0 { first_line_indent } else { 0.0 };
150        let line_avail = effective_width - indent;
151        match alignment {
152            Alignment::Left => {} // runs already at x=0 (plus indent)
153            Alignment::Right => {
154                let shift = (line_avail - line.width).max(0.0);
155                for run in &mut line.runs {
156                    run.x += shift;
157                }
158            }
159            Alignment::Center => {
160                let shift = ((line_avail - line.width) / 2.0).max(0.0);
161                for run in &mut line.runs {
162                    run.x += shift;
163                }
164            }
165            Alignment::Justify => {
166                // Don't justify the last line
167                if i < last_idx && line.width > 0.0 {
168                    justify_line(line, line_avail, text);
169                }
170            }
171        }
172    }
173
174    if lines.is_empty() {
175        lines.push(make_empty_line(metrics, 0..0));
176    }
177
178    // Convert glyph cluster values from byte offsets to char offsets.
179    // This must happen AFTER alignment because justify_line needs byte
180    // offsets to find space characters in the original text.
181    for line in &mut lines {
182        for run in &mut line.runs {
183            for glyph in &mut run.shaped_run.glyphs {
184                glyph.cluster = byte_offset_to_char_offset(text, glyph.cluster as usize) as u32;
185            }
186        }
187    }
188
189    lines
190}
191
192/// A flattened glyph with enough info to map back to runs.
193struct FlatGlyph {
194    x_advance: f32,
195    cluster: u32,
196    run_index: usize,
197    glyph_index_in_run: usize,
198}
199
200fn flatten_runs(runs: &[ShapedRun]) -> Vec<FlatGlyph> {
201    let mut flat = Vec::new();
202    for (run_idx, run) in runs.iter().enumerate() {
203        // Offset cluster values from fragment-text space to block-text space.
204        // rustybuzz assigns clusters as byte offsets within the fragment text (0-based),
205        // but unicode-linebreak returns byte offsets in the full block text.
206        let cluster_offset = run.text_range.start as u32;
207        for (glyph_idx, glyph) in run.glyphs.iter().enumerate() {
208            flat.push(FlatGlyph {
209                x_advance: glyph.x_advance,
210                cluster: glyph.cluster + cluster_offset,
211                run_index: run_idx,
212                glyph_index_in_run: glyph_idx,
213            });
214        }
215    }
216    flat
217}
218
219/// Map unicode-linebreak byte offsets to glyph indices using a merged walk.
220/// Both `flat` (by cluster) and `breaks` (by byte offset) are sorted,
221/// so a single O(b + m) pass replaces the previous O(b × m) approach.
222///
223/// Returns (break_points: HashSet<glyph_idx>, mandatory_breaks: HashSet<glyph_idx>).
224fn map_breaks_to_glyph_indices(
225    flat: &[FlatGlyph],
226    breaks: &[(usize, BreakOpportunity)],
227) -> (HashSet<usize>, HashSet<usize>) {
228    let mut break_points = HashSet::new();
229    let mut mandatory_breaks = HashSet::new();
230    let mut glyph_cursor = 0usize;
231
232    for &(byte_offset, opportunity) in breaks {
233        // Advance glyph cursor to the first glyph whose cluster >= byte_offset
234        while glyph_cursor < flat.len() && (flat[glyph_cursor].cluster as usize) < byte_offset {
235            glyph_cursor += 1;
236        }
237        let glyph_idx = if glyph_cursor < flat.len() {
238            glyph_cursor
239        } else {
240            flat.len()
241        };
242        break_points.insert(glyph_idx);
243        if opportunity == BreakOpportunity::Mandatory {
244            mandatory_breaks.insert(glyph_idx);
245        }
246    }
247
248    (break_points, mandatory_breaks)
249}
250
251/// Build a LayoutLine from a glyph range within the flat sequence.
252fn build_line(
253    runs: &[ShapedRun],
254    flat: &[FlatGlyph],
255    start: usize,
256    end: usize,
257    metrics: &FontMetricsPx,
258    indent: f32,
259    text: &str,
260) -> LayoutLine {
261    // Group consecutive glyphs by run_index to reconstruct PositionedRuns
262    let mut positioned_runs = Vec::new();
263    let mut x = indent;
264    let mut current_run_idx: Option<usize> = None;
265    let mut run_glyph_start = 0usize;
266
267    for i in start..end {
268        let fg = &flat[i];
269        if current_run_idx != Some(fg.run_index) {
270            // Emit previous run segment if any
271            if let Some(prev_run_idx) = current_run_idx {
272                // End of previous run: use the last glyph we saw from that run
273                let prev_end = if i > start {
274                    flat[i - 1].glyph_index_in_run + 1
275                } else {
276                    run_glyph_start
277                };
278                let sub_run = extract_sub_run(runs, prev_run_idx, run_glyph_start, prev_end);
279                if let Some((pr, advance)) = sub_run {
280                    positioned_runs.push(PositionedRun {
281                        decorations: RunDecorations {
282                            underline_style: pr.underline_style,
283                            overline: pr.overline,
284                            strikeout: pr.strikeout,
285                            is_link: pr.is_link,
286                            foreground_color: pr.foreground_color,
287                            underline_color: pr.underline_color,
288                            background_color: pr.background_color,
289                            anchor_href: pr.anchor_href.clone(),
290                            tooltip: pr.tooltip.clone(),
291                            vertical_alignment: pr.vertical_alignment,
292                        },
293                        shaped_run: pr,
294                        x,
295                    });
296                    x += advance;
297                }
298            }
299            current_run_idx = Some(fg.run_index);
300            run_glyph_start = fg.glyph_index_in_run;
301        }
302    }
303
304    // Emit final run segment
305    if let Some(run_idx) = current_run_idx {
306        let end_in_run = if end < flat.len() && flat[end].run_index == run_idx {
307            flat[end].glyph_index_in_run
308        } else if end > start {
309            flat[end - 1].glyph_index_in_run + 1
310        } else {
311            run_glyph_start
312        };
313        let sub_run = extract_sub_run(runs, run_idx, run_glyph_start, end_in_run);
314        if let Some((pr, advance)) = sub_run {
315            positioned_runs.push(PositionedRun {
316                decorations: RunDecorations {
317                    underline_style: pr.underline_style,
318                    overline: pr.overline,
319                    strikeout: pr.strikeout,
320                    is_link: pr.is_link,
321                    foreground_color: pr.foreground_color,
322                    underline_color: pr.underline_color,
323                    background_color: pr.background_color,
324                    anchor_href: pr.anchor_href.clone(),
325                    tooltip: pr.tooltip.clone(),
326                    vertical_alignment: pr.vertical_alignment,
327                },
328                shaped_run: pr,
329                x,
330            });
331            x += advance;
332        }
333    }
334
335    let width = x - indent;
336
337    // Compute char range from cluster values.
338    // Clusters from rustybuzz are byte offsets — convert to char offsets
339    // so that positions match text-document's character-based coordinates.
340    let byte_start = if start < flat.len() {
341        flat[start].cluster as usize
342    } else {
343        0
344    };
345    let byte_end = if end > 0 && end <= flat.len() {
346        if end < flat.len() {
347            flat[end].cluster as usize
348        } else {
349            let byte_offset = flat[end - 1].cluster as usize;
350            let char_len = text
351                .get(byte_offset..)
352                .and_then(|s| s.chars().next())
353                .map(|c| c.len_utf8())
354                .unwrap_or(1);
355            byte_offset + char_len
356        }
357    } else {
358        byte_start
359    };
360    let char_start = byte_offset_to_char_offset(text, byte_start);
361    let char_end = byte_offset_to_char_offset(text, byte_end);
362
363    // Expand line height for inline images taller than the font ascent
364    let mut ascent = metrics.ascent;
365    for run in &positioned_runs {
366        if run.shaped_run.image_name.is_some() && run.shaped_run.image_height > ascent {
367            ascent = run.shaped_run.image_height;
368        }
369    }
370    let line_height = ascent + metrics.descent + metrics.leading;
371
372    LayoutLine {
373        runs: positioned_runs,
374        y: 0.0, // will be set by the caller (block layout)
375        ascent,
376        descent: metrics.descent,
377        leading: metrics.leading,
378        width,
379        char_range: char_start..char_end,
380        line_height,
381    }
382}
383
384/// Extract a sub-run (slice of glyphs) from a ShapedRun.
385/// Cluster values are offset to block-text space (adding text_range.start).
386fn extract_sub_run(
387    runs: &[ShapedRun],
388    run_index: usize,
389    glyph_start: usize,
390    glyph_end: usize,
391) -> Option<(ShapedRun, f32)> {
392    let run = &runs[run_index];
393    let end = glyph_end.min(run.glyphs.len());
394    if glyph_start >= end {
395        return None;
396    }
397    let cluster_offset = run.text_range.start as u32;
398    let mut sub_glyphs = run.glyphs[glyph_start..end].to_vec();
399    // Offset cluster values from fragment-local to block-text space
400    for g in &mut sub_glyphs {
401        g.cluster += cluster_offset;
402    }
403    let advance: f32 = sub_glyphs.iter().map(|g| g.x_advance).sum();
404
405    let sub_run = ShapedRun {
406        font_face_id: run.font_face_id,
407        size_px: run.size_px,
408        glyphs: sub_glyphs,
409        advance_width: advance,
410        text_range: run.text_range.clone(),
411        underline_style: run.underline_style,
412        overline: run.overline,
413        strikeout: run.strikeout,
414        is_link: run.is_link,
415        foreground_color: run.foreground_color,
416        underline_color: run.underline_color,
417        background_color: run.background_color,
418        anchor_href: run.anchor_href.clone(),
419        tooltip: run.tooltip.clone(),
420        vertical_alignment: run.vertical_alignment,
421        image_name: run.image_name.clone(),
422        image_height: run.image_height,
423    };
424    Some((sub_run, advance))
425}
426
427fn make_empty_line(metrics: &FontMetricsPx, char_range: Range<usize>) -> LayoutLine {
428    LayoutLine {
429        runs: Vec::new(),
430        y: 0.0,
431        ascent: metrics.ascent,
432        descent: metrics.descent,
433        leading: metrics.leading,
434        width: 0.0,
435        char_range,
436        line_height: metrics.ascent + metrics.descent + metrics.leading,
437    }
438}
439
440/// Distribute extra space among word gaps for justification.
441///
442/// Finds space glyphs (cluster mapping to ' ') across all runs and
443/// increases their x_advance proportionally. Then recomputes run x positions.
444fn justify_line(line: &mut LayoutLine, target_width: f32, text: &str) {
445    let extra = target_width - line.width;
446    if extra <= 0.0 {
447        return;
448    }
449
450    // Count space glyphs across all runs
451    let mut space_count = 0usize;
452    for run in &line.runs {
453        for glyph in &run.shaped_run.glyphs {
454            let byte_offset = glyph.cluster as usize;
455            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
456                && ch == ' '
457            {
458                space_count += 1;
459            }
460        }
461    }
462
463    if space_count == 0 {
464        return;
465    }
466
467    let extra_per_space = extra / space_count as f32;
468
469    // Increase x_advance of space glyphs
470    for run in &mut line.runs {
471        for glyph in &mut run.shaped_run.glyphs {
472            let byte_offset = glyph.cluster as usize;
473            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
474                && ch == ' '
475            {
476                glyph.x_advance += extra_per_space;
477            }
478        }
479        // Recompute run advance width
480        run.shaped_run.advance_width = run.shaped_run.glyphs.iter().map(|g| g.x_advance).sum();
481    }
482
483    // Recompute run x positions (runs follow each other)
484    let first_x = line.runs.first().map(|r| r.x).unwrap_or(0.0);
485    let mut x = first_x;
486    for run in &mut line.runs {
487        run.x = x;
488        x += run.shaped_run.advance_width;
489    }
490
491    line.width = target_width;
492}