Skip to main content

text_typeset/layout/
paragraph.rs

1use std::collections::HashSet;
2use std::ops::Range;
3
4use unicode_linebreak::{BreakOpportunity, linebreaks};
5
6use crate::layout::line::{LayoutLine, PositionedRun, RunDecorations};
7use crate::shaping::run::ShapedRun;
8use crate::shaping::shaper::FontMetricsPx;
9
10/// Convert a byte offset within a UTF-8 string to a char offset.
11fn byte_offset_to_char_offset(text: &str, byte_offset: usize) -> usize {
12    text[..byte_offset.min(text.len())].chars().count()
13}
14
15/// Text alignment within a line.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17pub enum Alignment {
18    #[default]
19    Left,
20    Right,
21    Center,
22    Justify,
23}
24
25/// Break shaped runs into lines that fit within `available_width`.
26///
27/// Strategy: shape-first-then-break.
28/// 1. The caller has already shaped the full paragraph into one or more ShapedRuns.
29/// 2. We use unicode-linebreak to find break opportunities in the original text.
30/// 3. We map break positions to glyph boundaries via cluster values.
31/// 4. Greedy line wrapping: accumulate glyph advances, break at the last
32///    allowed opportunity before exceeding the width.
33/// 5. Apply alignment per line.
34pub fn break_into_lines(
35    runs: Vec<ShapedRun>,
36    text: &str,
37    available_width: f32,
38    alignment: Alignment,
39    first_line_indent: f32,
40    metrics: &FontMetricsPx,
41) -> Vec<LayoutLine> {
42    if runs.is_empty() || text.is_empty() {
43        // Empty paragraph: produce one empty line for the block to have height
44        return vec![make_empty_line(metrics, 0..0)];
45    }
46
47    // Flatten all glyphs into a single sequence with their run association
48    let flat = flatten_runs(&runs);
49    if flat.is_empty() {
50        return vec![make_empty_line(metrics, 0..0)];
51    }
52
53    // Get break opportunities from unicode-linebreak (byte offsets in text)
54    let breaks: Vec<(usize, BreakOpportunity)> = linebreaks(text).collect();
55
56    // Build sets of allowed and mandatory break positions (glyph indices)
57    let (break_points, mandatory_breaks) = map_breaks_to_glyph_indices(&flat, &breaks);
58
59    // Greedy line wrapping
60    let mut lines = Vec::new();
61    let mut line_start_glyph = 0usize;
62    let mut line_width = 0.0f32;
63    let mut last_break_glyph: Option<usize> = None;
64    // First line may be indented; subsequent lines use full width
65    let mut effective_width = available_width - first_line_indent;
66
67    for i in 0..flat.len() {
68        let glyph_advance = flat[i].x_advance;
69        line_width += glyph_advance;
70
71        // Check if this glyph index is a break point
72        if break_points.contains(&(i + 1)) {
73            last_break_glyph = Some(i + 1);
74        }
75
76        // Check for mandatory break — O(1) HashSet lookup
77        let is_mandatory = mandatory_breaks.contains(&(i + 1));
78
79        let exceeds_width = line_width > effective_width && line_start_glyph < i;
80
81        if is_mandatory || exceeds_width {
82            let break_at = if is_mandatory {
83                i + 1
84            } else if let Some(bp) = last_break_glyph {
85                if bp > line_start_glyph {
86                    bp
87                } else {
88                    i + 1 // emergency break -no opportunity found
89                }
90            } else {
91                i + 1 // emergency break -no break opportunities at all
92            };
93
94            let indent = if lines.is_empty() {
95                first_line_indent
96            } else {
97                0.0
98            };
99            let line = build_line(
100                &runs,
101                &flat,
102                line_start_glyph,
103                break_at,
104                metrics,
105                indent,
106                text,
107            );
108            lines.push(line);
109
110            line_start_glyph = break_at;
111            // Subsequent lines use full available width
112            effective_width = available_width;
113            // Re-accumulate width for glyphs already scanned past the break
114            line_width = 0.0;
115            for j in break_at..=i {
116                if j < flat.len() {
117                    line_width += flat[j].x_advance;
118                }
119            }
120            last_break_glyph = None;
121        }
122    }
123
124    // Remaining glyphs form the last line
125    if line_start_glyph < flat.len() {
126        let line = build_line(
127            &runs,
128            &flat,
129            line_start_glyph,
130            flat.len(),
131            metrics,
132            if lines.is_empty() {
133                first_line_indent
134            } else {
135                0.0
136            },
137            text,
138        );
139        lines.push(line);
140    }
141
142    // Apply alignment
143    let effective_width = available_width;
144    let last_idx = lines.len().saturating_sub(1);
145    for (i, line) in lines.iter_mut().enumerate() {
146        let indent = if i == 0 { first_line_indent } else { 0.0 };
147        let line_avail = effective_width - indent;
148        match alignment {
149            Alignment::Left => {} // runs already at x=0 (plus indent)
150            Alignment::Right => {
151                let shift = (line_avail - line.width).max(0.0);
152                for run in &mut line.runs {
153                    run.x += shift;
154                }
155            }
156            Alignment::Center => {
157                let shift = ((line_avail - line.width) / 2.0).max(0.0);
158                for run in &mut line.runs {
159                    run.x += shift;
160                }
161            }
162            Alignment::Justify => {
163                // Don't justify the last line
164                if i < last_idx && line.width > 0.0 {
165                    justify_line(line, line_avail, text);
166                }
167            }
168        }
169    }
170
171    if lines.is_empty() {
172        lines.push(make_empty_line(metrics, 0..0));
173    }
174
175    lines
176}
177
178/// A flattened glyph with enough info to map back to runs.
179struct FlatGlyph {
180    x_advance: f32,
181    cluster: u32,
182    run_index: usize,
183    glyph_index_in_run: usize,
184}
185
186fn flatten_runs(runs: &[ShapedRun]) -> Vec<FlatGlyph> {
187    let mut flat = Vec::new();
188    for (run_idx, run) in runs.iter().enumerate() {
189        // Offset cluster values from fragment-text space to block-text space.
190        // rustybuzz assigns clusters as byte offsets within the fragment text (0-based),
191        // but unicode-linebreak returns byte offsets in the full block text.
192        let cluster_offset = run.text_range.start as u32;
193        for (glyph_idx, glyph) in run.glyphs.iter().enumerate() {
194            flat.push(FlatGlyph {
195                x_advance: glyph.x_advance,
196                cluster: glyph.cluster + cluster_offset,
197                run_index: run_idx,
198                glyph_index_in_run: glyph_idx,
199            });
200        }
201    }
202    flat
203}
204
205/// Map unicode-linebreak byte offsets to glyph indices using a merged walk.
206/// Both `flat` (by cluster) and `breaks` (by byte offset) are sorted,
207/// so a single O(b + m) pass replaces the previous O(b × m) approach.
208///
209/// Returns (break_points: HashSet<glyph_idx>, mandatory_breaks: HashSet<glyph_idx>).
210fn map_breaks_to_glyph_indices(
211    flat: &[FlatGlyph],
212    breaks: &[(usize, BreakOpportunity)],
213) -> (HashSet<usize>, HashSet<usize>) {
214    let mut break_points = HashSet::new();
215    let mut mandatory_breaks = HashSet::new();
216    let mut glyph_cursor = 0usize;
217
218    for &(byte_offset, opportunity) in breaks {
219        // Advance glyph cursor to the first glyph whose cluster >= byte_offset
220        while glyph_cursor < flat.len() && (flat[glyph_cursor].cluster as usize) < byte_offset {
221            glyph_cursor += 1;
222        }
223        let glyph_idx = if glyph_cursor < flat.len() {
224            glyph_cursor
225        } else {
226            flat.len()
227        };
228        break_points.insert(glyph_idx);
229        if opportunity == BreakOpportunity::Mandatory {
230            mandatory_breaks.insert(glyph_idx);
231        }
232    }
233
234    (break_points, mandatory_breaks)
235}
236
237/// Build a LayoutLine from a glyph range within the flat sequence.
238fn build_line(
239    runs: &[ShapedRun],
240    flat: &[FlatGlyph],
241    start: usize,
242    end: usize,
243    metrics: &FontMetricsPx,
244    indent: f32,
245    text: &str,
246) -> LayoutLine {
247    // Group consecutive glyphs by run_index to reconstruct PositionedRuns
248    let mut positioned_runs = Vec::new();
249    let mut x = indent;
250    let mut current_run_idx: Option<usize> = None;
251    let mut run_glyph_start = 0usize;
252
253    for i in start..end {
254        let fg = &flat[i];
255        if current_run_idx != Some(fg.run_index) {
256            // Emit previous run segment if any
257            if let Some(prev_run_idx) = current_run_idx {
258                // End of previous run: use the last glyph we saw from that run
259                let prev_end = if i > start {
260                    flat[i - 1].glyph_index_in_run + 1
261                } else {
262                    run_glyph_start
263                };
264                let sub_run = extract_sub_run(runs, prev_run_idx, run_glyph_start, prev_end);
265                if let Some((pr, advance)) = sub_run {
266                    positioned_runs.push(PositionedRun {
267                        decorations: RunDecorations {
268                            underline: pr.underline,
269                            overline: pr.overline,
270                            strikeout: pr.strikeout,
271                            is_link: pr.is_link,
272                        },
273                        shaped_run: pr,
274                        x,
275                    });
276                    x += advance;
277                }
278            }
279            current_run_idx = Some(fg.run_index);
280            run_glyph_start = fg.glyph_index_in_run;
281        }
282    }
283
284    // Emit final run segment
285    if let Some(run_idx) = current_run_idx {
286        let end_in_run = if end < flat.len() && flat[end].run_index == run_idx {
287            flat[end].glyph_index_in_run
288        } else if end > start {
289            flat[end - 1].glyph_index_in_run + 1
290        } else {
291            run_glyph_start
292        };
293        let sub_run = extract_sub_run(runs, run_idx, run_glyph_start, end_in_run);
294        if let Some((pr, advance)) = sub_run {
295            positioned_runs.push(PositionedRun {
296                decorations: RunDecorations {
297                    underline: pr.underline,
298                    overline: pr.overline,
299                    strikeout: pr.strikeout,
300                    is_link: pr.is_link,
301                },
302                shaped_run: pr,
303                x,
304            });
305            x += advance;
306        }
307    }
308
309    let width = x - indent;
310
311    // Compute char range from cluster values.
312    // Clusters from rustybuzz are byte offsets — convert to char offsets
313    // so that positions match text-document's character-based coordinates.
314    let byte_start = if start < flat.len() {
315        flat[start].cluster as usize
316    } else {
317        0
318    };
319    let byte_end = if end > 0 && end <= flat.len() {
320        if end < flat.len() {
321            flat[end].cluster as usize
322        } else {
323            let byte_offset = flat[end - 1].cluster as usize;
324            let char_len = text
325                .get(byte_offset..)
326                .and_then(|s| s.chars().next())
327                .map(|c| c.len_utf8())
328                .unwrap_or(1);
329            byte_offset + char_len
330        }
331    } else {
332        byte_start
333    };
334    let char_start = byte_offset_to_char_offset(text, byte_start);
335    let char_end = byte_offset_to_char_offset(text, byte_end);
336
337    // Convert glyph cluster values from byte offsets to char offsets
338    for run in &mut positioned_runs {
339        for glyph in &mut run.shaped_run.glyphs {
340            glyph.cluster = byte_offset_to_char_offset(text, glyph.cluster as usize) as u32;
341        }
342    }
343
344    let line_height = metrics.ascent + metrics.descent + metrics.leading;
345
346    LayoutLine {
347        runs: positioned_runs,
348        y: 0.0, // will be set by the caller (block layout)
349        ascent: metrics.ascent,
350        descent: metrics.descent,
351        leading: metrics.leading,
352        width,
353        char_range: char_start..char_end,
354        line_height,
355    }
356}
357
358/// Extract a sub-run (slice of glyphs) from a ShapedRun.
359/// Cluster values are offset to block-text space (adding text_range.start).
360fn extract_sub_run(
361    runs: &[ShapedRun],
362    run_index: usize,
363    glyph_start: usize,
364    glyph_end: usize,
365) -> Option<(ShapedRun, f32)> {
366    let run = &runs[run_index];
367    let end = glyph_end.min(run.glyphs.len());
368    if glyph_start >= end {
369        return None;
370    }
371    let cluster_offset = run.text_range.start as u32;
372    let mut sub_glyphs = run.glyphs[glyph_start..end].to_vec();
373    // Offset cluster values from fragment-local to block-text space
374    for g in &mut sub_glyphs {
375        g.cluster += cluster_offset;
376    }
377    let advance: f32 = sub_glyphs.iter().map(|g| g.x_advance).sum();
378
379    let sub_run = ShapedRun {
380        font_face_id: run.font_face_id,
381        size_px: run.size_px,
382        glyphs: sub_glyphs,
383        advance_width: advance,
384        text_range: run.text_range.clone(),
385        underline: run.underline,
386        overline: run.overline,
387        strikeout: run.strikeout,
388        is_link: run.is_link,
389    };
390    Some((sub_run, advance))
391}
392
393fn make_empty_line(metrics: &FontMetricsPx, char_range: Range<usize>) -> LayoutLine {
394    LayoutLine {
395        runs: Vec::new(),
396        y: 0.0,
397        ascent: metrics.ascent,
398        descent: metrics.descent,
399        leading: metrics.leading,
400        width: 0.0,
401        char_range,
402        line_height: metrics.ascent + metrics.descent + metrics.leading,
403    }
404}
405
406/// Distribute extra space among word gaps for justification.
407///
408/// Finds space glyphs (cluster mapping to ' ') across all runs and
409/// increases their x_advance proportionally. Then recomputes run x positions.
410fn justify_line(line: &mut LayoutLine, target_width: f32, text: &str) {
411    let extra = target_width - line.width;
412    if extra <= 0.0 {
413        return;
414    }
415
416    // Count space glyphs across all runs
417    let mut space_count = 0usize;
418    for run in &line.runs {
419        for glyph in &run.shaped_run.glyphs {
420            let byte_offset = glyph.cluster as usize;
421            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
422                && ch == ' '
423            {
424                space_count += 1;
425            }
426        }
427    }
428
429    if space_count == 0 {
430        return;
431    }
432
433    let extra_per_space = extra / space_count as f32;
434
435    // Increase x_advance of space glyphs
436    for run in &mut line.runs {
437        for glyph in &mut run.shaped_run.glyphs {
438            let byte_offset = glyph.cluster as usize;
439            if let Some(ch) = text.get(byte_offset..).and_then(|s| s.chars().next())
440                && ch == ' '
441            {
442                glyph.x_advance += extra_per_space;
443            }
444        }
445        // Recompute run advance width
446        run.shaped_run.advance_width = run.shaped_run.glyphs.iter().map(|g| g.x_advance).sum();
447    }
448
449    // Recompute run x positions (runs follow each other)
450    let first_x = line.runs.first().map(|r| r.x).unwrap_or(0.0);
451    let mut x = first_x;
452    for run in &mut line.runs {
453        run.x = x;
454        x += run.shaped_run.advance_width;
455    }
456
457    line.width = target_width;
458}