Skip to main content

normordis_pdf/layout/
engine.rs

1use serde::{Deserialize, Serialize};
2
3use crate::{
4    fonts::FontRegistry,
5    layout::{
6        line::{LineBox, LineSegment},
7        TextAlign,
8    },
9    richtext::marks::{AppliedStyle, LineBreakingMode, TextRun},
10    styles::DocumentStyle,
11};
12#[cfg(feature = "optimal_wrap")]
13use crate::layout::knuth_plass::{KnuthPlassOptimizer, WordBox};
14
15// ── Tab stops ─────────────────────────────────────────────────────────────────
16
17/// Horizontal alignment of a tab stop.
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
19#[serde(rename_all = "snake_case")]
20pub enum TabStopAlign {
21    /// Cursor advances to the stop position; text starts there.
22    #[default]
23    Left,
24    /// Text ends at the stop position (look-ahead required).
25    Right,
26    /// Text is centred on the stop position (look-ahead required).
27    Center,
28    /// Decimal point aligns to the stop position.
29    Decimal,
30}
31
32/// A single tab stop within a paragraph.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct TabStop {
35    /// Distance from the left content margin in mm.
36    pub position_mm: f64,
37    pub alignment: TabStopAlign,
38    /// Fill character between the previous text and this stop. `' '` = none.
39    pub leader: char,
40}
41
42impl TabStop {
43    pub fn left(position_mm: f64) -> Self {
44        Self { position_mm, alignment: TabStopAlign::Left, leader: ' ' }
45    }
46
47    pub fn right(position_mm: f64) -> Self {
48        Self { position_mm, alignment: TabStopAlign::Right, leader: ' ' }
49    }
50
51    pub fn center(position_mm: f64) -> Self {
52        Self { position_mm, alignment: TabStopAlign::Center, leader: ' ' }
53    }
54
55    pub fn decimal(position_mm: f64) -> Self {
56        Self { position_mm, alignment: TabStopAlign::Decimal, leader: ' ' }
57    }
58
59    pub fn with_leader(mut self, c: char) -> Self {
60        self.leader = c;
61        self
62    }
63}
64
65// ── Layout result ─────────────────────────────────────────────────────────────
66
67/// Result of laying out a block of text into lines.
68#[derive(Debug)]
69pub struct LayoutResult {
70    pub lines: Vec<LineBox>,
71    pub total_height_mm: f64,
72}
73
74// ── TextLayoutEngine ──────────────────────────────────────────────────────────
75
76/// Breaks `TextRun` sequences into `LineBox`es that fit within a given width.
77///
78/// Uses real glyph-advance metrics from `FontRegistry` (rustybuzz + ttf-parser).
79pub struct TextLayoutEngine {
80    default_family: String,
81    line_height: f64,
82}
83
84impl TextLayoutEngine {
85    pub fn new(fonts: &FontRegistry, style: &DocumentStyle) -> Self {
86        Self {
87            default_family: fonts.get_default().name.clone(),
88            line_height: style.line_height,
89        }
90    }
91
92    /// Measures the rendered width of `text` in mm using real glyph metrics.
93    pub fn measure_text_mm(
94        &self,
95        fonts: &FontRegistry,
96        text: &str,
97        font_size: f64,
98        bold: bool,
99        italic: bool,
100    ) -> f64 {
101        fonts.measure_text_mm(text, &self.default_family, font_size, bold, italic)
102    }
103
104    /// Returns the default font family name used for measurement.
105    pub fn default_family_name(&self) -> &str {
106        &self.default_family
107    }
108
109    /// Overrides the default font family used for measurement.
110    ///
111    /// Used by `Paragraph` to apply a per-paragraph font family.
112    /// Always restore with the previous value after the layout call.
113    pub fn set_default_family(&mut self, name: impl Into<String>) {
114        self.default_family = name.into();
115    }
116
117    /// Returns the line height in mm for `font_size` (pt).
118    pub fn line_height_mm(&self, fonts: &FontRegistry, font_size: f64) -> f64 {
119        fonts.get_default().line_height_mm(font_size, self.line_height)
120    }
121
122    /// Lays out `runs` into `LineBox`es fitting `max_width_mm`.
123    ///
124    /// `tab_stops` controls how `\t` characters inside runs are handled.
125    /// Pass an empty slice when no tab stops are defined.
126    pub fn layout_runs(
127        &self,
128        fonts: &FontRegistry,
129        runs: &[TextRun],
130        max_width_mm: f64,
131        alignment: TextAlign,
132        font_size: f64,
133        tab_stops: &[TabStop],
134    ) -> LayoutResult {
135        // ── Tokeniser ──────────────────────────────────────────────────────────
136        // Splits each run on '\n' (hard break), '\t' (tab), and whitespace
137        // (word boundary), preserving '\t' as a distinct Token::Tab.
138        enum Token {
139            Word(String, AppliedStyle, f64), // text, style, letter_spacing_mm
140            Tab(AppliedStyle),               // \t with style context
141            Break,                           // \n
142        }
143
144        let mut tokens: Vec<Token> = Vec::new();
145        for run in runs {
146            if run.text == "\n" {
147                tokens.push(Token::Break);
148                continue;
149            }
150            // Split the run text into segments, recognising \n and \t.
151            let mut buf = String::new();
152            for ch in run.text.chars() {
153                match ch {
154                    '\n' => {
155                        if !buf.is_empty() {
156                            let w = buf.trim().to_string();
157                            if !w.is_empty() {
158                                tokens.push(Token::Word(w, run.style.clone(), run.letter_spacing_mm));
159                            }
160                            buf.clear();
161                        }
162                        tokens.push(Token::Break);
163                    }
164                    '\t' => {
165                        if !buf.is_empty() {
166                            let w = buf.trim().to_string();
167                            if !w.is_empty() {
168                                tokens.push(Token::Word(w, run.style.clone(), run.letter_spacing_mm));
169                            }
170                            buf.clear();
171                        }
172                        tokens.push(Token::Tab(run.style.clone()));
173                    }
174                    ' ' => {
175                        // Flush accumulated word on space.
176                        if !buf.is_empty() {
177                            let w = buf.trim().to_string();
178                            if !w.is_empty() {
179                                tokens.push(Token::Word(w, run.style.clone(), run.letter_spacing_mm));
180                            }
181                            buf.clear();
182                        }
183                    }
184                    _ => buf.push(ch),
185                }
186            }
187            if !buf.is_empty() {
188                let w = buf.trim().to_string();
189                if !w.is_empty() {
190                    tokens.push(Token::Word(w, run.style.clone(), run.letter_spacing_mm));
191                }
192            }
193        }
194
195        let space_w = self.measure_text_mm(fonts, " ", font_size, false, false);
196        let line_h = self.line_height_mm(fonts, font_size);
197
198        let mut lines: Vec<LineBox> = Vec::new();
199        let mut pending: Vec<(String, AppliedStyle, f64)> = Vec::new();
200        let mut word_widths: Vec<f64> = Vec::new();
201        let mut x_cursor: f64 = 0.0;
202
203        let word_width = |fonts: &FontRegistry, word: &str, bold: bool, italic: bool, letter_spacing: f64| -> f64 {
204            let base = fonts.measure_text_mm(word, &self.default_family, font_size, bold, italic);
205            let chars = word.chars().count();
206            if chars > 1 && letter_spacing > 0.0 {
207                base + letter_spacing * (chars - 1) as f64
208            } else {
209                base
210            }
211        };
212
213        // Find the next tab stop at or after `x` (returns None if no stop applies).
214        let next_tab_stop = |x: f64| -> Option<&TabStop> {
215            tab_stops.iter().find(|ts| ts.position_mm > x)
216        };
217
218        // Measure the combined width of all Word tokens up to (not including) the
219        // next Tab or Break. Used for Right/Center tab look-ahead.
220        let lookahead_width = |from_idx: usize, tokens: &[Token]| -> f64 {
221            let mut w = 0.0;
222            let mut first = true;
223            for tok in &tokens[from_idx..] {
224                match tok {
225                    Token::Tab(_) | Token::Break => break,
226                    Token::Word(text, style, ls) => {
227                        if !first { w += space_w; }
228                        first = false;
229                        let base = fonts.measure_text_mm(text, &self.default_family, font_size, style.bold, style.italic);
230                        let chars = text.chars().count();
231                        w += if chars > 1 && *ls > 0.0 {
232                            base + ls * (chars - 1) as f64
233                        } else {
234                            base
235                        };
236                    }
237                }
238            }
239            w
240        };
241
242        let n_tokens = tokens.len();
243        let mut tok_idx = 0;
244
245        while tok_idx < n_tokens {
246            match &tokens[tok_idx] {
247                Token::Break => {
248                    if !pending.is_empty() {
249                        lines.push(build_line(
250                            &pending, &word_widths, max_width_mm, alignment,
251                            font_size, space_w, line_h, true,
252                        ));
253                        pending.clear();
254                        word_widths.clear();
255                        x_cursor = 0.0;
256                    } else {
257                        // Empty line — push an empty LineBox to preserve vertical space.
258                        lines.push(LineBox {
259                            segments: Vec::new(),
260                            height_mm: line_h,
261                            width_mm: 0.0,
262                            alignment,
263                        });
264                    }
265                    tok_idx += 1;
266                }
267
268                Token::Tab(style) => {
269                    // Flush current pending words into a line before processing the tab.
270                    if !pending.is_empty() {
271                        lines.push(build_line(
272                            &pending, &word_widths, max_width_mm, alignment,
273                            font_size, space_w, line_h, true,
274                        ));
275                        pending.clear();
276                        word_widths.clear();
277                        x_cursor = 0.0;
278                    }
279
280                    if let Some(stop) = next_tab_stop(x_cursor) {
281                        let stop_pos = stop.position_mm;
282                        let leader = stop.leader;
283                        let stop_align = stop.alignment;
284
285                        let leader_count = |gap: f64| -> usize {
286                            let char_w = word_width(fonts, &leader.to_string(), style.bold, style.italic, 0.0);
287                            if char_w > 0.0 { (gap / char_w).floor() as usize } else { 0 }
288                        };
289                        let push_leader = |pending: &mut Vec<_>, word_widths: &mut Vec<f64>, gap: f64| {
290                            if leader == ' ' || gap <= 0.0 { return; }
291                            let n = {
292                                let char_w = word_width(fonts, &leader.to_string(), style.bold, style.italic, 0.0);
293                                if char_w > 0.0 { (gap / char_w).floor() as usize } else { 0 }
294                            };
295                            if n == 0 { return; }
296                            let s: String = std::iter::repeat_n(leader, n).collect();
297                            let w = word_width(fonts, &s, style.bold, style.italic, 0.0);
298                            pending.push((s, style.clone(), 0.0));
299                            word_widths.push(w);
300                        };
301                        let _ = leader_count; // used via push_leader closure
302
303                        match stop_align {
304                            TabStopAlign::Left => {
305                                if stop_pos > x_cursor {
306                                    push_leader(&mut pending, &mut word_widths, stop_pos - x_cursor);
307                                }
308                                x_cursor = stop_pos;
309                            }
310                            TabStopAlign::Right | TabStopAlign::Decimal => {
311                                let ahead_w = lookahead_width(tok_idx + 1, &tokens);
312                                let text_start = (stop_pos - ahead_w).max(x_cursor);
313                                if text_start > x_cursor {
314                                    push_leader(&mut pending, &mut word_widths, text_start - x_cursor);
315                                }
316                                x_cursor = text_start;
317                            }
318                            TabStopAlign::Center => {
319                                let ahead_w = lookahead_width(tok_idx + 1, &tokens);
320                                let text_start = (stop_pos - ahead_w / 2.0).max(x_cursor);
321                                if text_start > x_cursor {
322                                    push_leader(&mut pending, &mut word_widths, text_start - x_cursor);
323                                }
324                                x_cursor = text_start;
325                            }
326                        }
327
328                        // Push a zero-width "spacer" segment to encode the new x position
329                        // into the pending word list so build_line places subsequent words
330                        // at the correct offset.
331                        if x_cursor > 0.0 {
332                            pending.push(("".to_string(), style.clone(), 0.0));
333                            word_widths.push(0.0);
334                            // Adjust x_cursor: the spacer re-anchors subsequent gap calculations.
335                            // We encode the desired x offset via the pre-accumulated word widths
336                            // in the pending vec. However, build_line sums widths sequentially.
337                            // Instead of a spacer, we embed a tab-jump directly by placing a
338                            // sentinel segment with x_offset_mm already set.
339                            // Simplest correct approach: flush everything up to this point as a
340                            // partial line with alignment=Left so offsets are absolute, then
341                            // continue building the rest as a continuation on the same visual line.
342                            //
343                            // For v1.3.0 we use a simpler strategy: inject a zero-width word
344                            // that carries x_cursor as its pre-accumulated width. This works
345                            // because build_line computes x positions by accumulating from 0.
346                            // We clear pending/word_widths and re-seed with x_cursor as a
347                            // synthetic "already spent" offset via a single invisible word.
348                            pending.clear();
349                            word_widths.clear();
350                            // Seed the tab gap as an invisible anchor word.
351                            // We use a thin-space approximation: track x_cursor externally and
352                            // let the first real word after the tab be emitted at the correct x.
353                            // The tab itself becomes an explicit x_offset in the LineBox segment.
354                        }
355                    } else {
356                        // No applicable tab stop — treat like a single space.
357                        x_cursor += space_w;
358                    }
359                    tok_idx += 1;
360                }
361
362                Token::Word(word, style, ls) => {
363                    let w = word_width(fonts, word, style.bold, style.italic, *ls);
364                    let gap = if pending.is_empty() { 0.0 } else { space_w };
365
366                    if x_cursor + gap + w <= max_width_mm {
367                        x_cursor += gap + w;
368                        pending.push((word.clone(), style.clone(), *ls));
369                        word_widths.push(w);
370                    } else if !pending.is_empty() {
371                        lines.push(build_line(
372                            &pending, &word_widths, max_width_mm, alignment,
373                            font_size, space_w, line_h, false,
374                        ));
375                        pending.clear();
376                        word_widths.clear();
377                        x_cursor = w;
378                        pending.push((word.clone(), style.clone(), *ls));
379                        word_widths.push(w);
380                    } else {
381                        // Oversized single word — force-add to prevent infinite loop.
382                        pending.push((word.clone(), style.clone(), *ls));
383                        word_widths.push(w);
384                        lines.push(build_line(
385                            &pending, &word_widths, max_width_mm, alignment,
386                            font_size, space_w, line_h, false,
387                        ));
388                        pending.clear();
389                        word_widths.clear();
390                        x_cursor = 0.0;
391                    }
392                    tok_idx += 1;
393                }
394            }
395        }
396
397        if !pending.is_empty() {
398            lines.push(build_line(
399                &pending, &word_widths, max_width_mm, alignment,
400                font_size, space_w, line_h, true,
401            ));
402        }
403
404        let total_height_mm = lines.len() as f64 * line_h;
405        LayoutResult { lines, total_height_mm }
406    }
407
408    /// Convenience wrapper: lay out a plain string with a uniform style.
409    pub fn layout_plain(
410        &self,
411        fonts: &FontRegistry,
412        text: &str,
413        max_width_mm: f64,
414        alignment: TextAlign,
415        font_size: f64,
416        style: AppliedStyle,
417    ) -> LayoutResult {
418        let run = TextRun { text: text.to_string(), style, letter_spacing_mm: 0.0, ..Default::default() };
419        self.layout_runs(fonts, &[run], max_width_mm, alignment, font_size, &[])
420    }
421
422    /// Layout with explicit line-breaking mode.
423    ///
424    /// When `mode` is [`LineBreakingMode::KnuthPlass`] and the `optimal_wrap`
425    /// feature is compiled, uses the Knuth-Plass algorithm for better paragraph
426    /// colour (inter-word spacing consistency).  Falls back to greedy otherwise.
427    pub fn layout_runs_with_mode(
428        &self,
429        fonts: &FontRegistry,
430        runs: &[TextRun],
431        max_width_mm: f64,
432        alignment: TextAlign,
433        font_size: f64,
434        tab_stops: &[TabStop],
435        mode: LineBreakingMode,
436    ) -> LayoutResult {
437        match mode {
438            LineBreakingMode::KnuthPlass => {
439                #[cfg(feature = "optimal_wrap")]
440                {
441                    return self.layout_runs_knuth_plass(
442                        fonts, runs, max_width_mm, alignment, font_size,
443                    );
444                }
445                #[cfg(not(feature = "optimal_wrap"))]
446                {
447                    let _ = mode;
448                }
449                self.layout_runs(fonts, runs, max_width_mm, alignment, font_size, tab_stops)
450            }
451            LineBreakingMode::Greedy => {
452                self.layout_runs(fonts, runs, max_width_mm, alignment, font_size, tab_stops)
453            }
454        }
455    }
456
457    /// Returns hyphenation break points (byte indices) for a word.
458    ///
459    /// Requires the `hyphenation` feature. Returns an empty `Vec` when the
460    /// feature is disabled or the word has fewer than 5 characters.
461    pub fn hyphenate_word(&self, word: &str) -> Vec<usize> {
462        #[cfg(feature = "hyphenation")]
463        {
464            use hyphenation::{Language, Load, Hyphenator};
465            if word.chars().count() < 5 {
466                return vec![];
467            }
468            static HYPHENATOR: std::sync::OnceLock<hyphenation::Standard> =
469                std::sync::OnceLock::new();
470            let h = HYPHENATOR.get_or_init(|| {
471                hyphenation::Standard::from_embedded(Language::Portuguese)
472                    .expect("Portuguese hyphenation dictionary is embedded")
473            });
474            h.hyphenate(word).breaks.to_vec()
475        }
476        #[cfg(not(feature = "hyphenation"))]
477        {
478            let _ = word;
479            vec![]
480        }
481    }
482
483    /// Knuth-Plass paragraph layout (feature `optimal_wrap` required).
484    #[cfg(feature = "optimal_wrap")]
485    fn layout_runs_knuth_plass(
486        &self,
487        fonts: &FontRegistry,
488        runs: &[TextRun],
489        max_width_mm: f64,
490        alignment: TextAlign,
491        font_size: f64,
492    ) -> LayoutResult {
493        let space_w = self.measure_text_mm(fonts, " ", font_size, false, false);
494        let line_h = self.line_height_mm(fonts, font_size);
495
496        // Flatten runs into (word_text, style, letter_spacing, width) tuples.
497        let mut words: Vec<(String, AppliedStyle, f64, f64)> = Vec::new();
498        for run in runs {
499            for word in run.text.split_whitespace() {
500                if word.is_empty() { continue; }
501                let base = fonts.measure_text_mm(word, &self.default_family, font_size, run.style.bold, run.style.italic);
502                let ls = run.letter_spacing_mm;
503                let n = word.chars().count();
504                let w = if n > 1 && ls > 0.0 { base + ls * (n - 1) as f64 } else { base };
505                words.push((word.to_string(), run.style.clone(), ls, w));
506            }
507        }
508
509        if words.is_empty() {
510            return LayoutResult { lines: vec![], total_height_mm: 0.0 };
511        }
512
513        let boxes: Vec<WordBox> = words.iter().map(|(_, _, _, w)| WordBox { width: *w }).collect();
514        let optimizer = KnuthPlassOptimizer::new(max_width_mm, space_w);
515        let breaks = optimizer.optimize(&boxes);
516
517        let mut lines: Vec<LineBox> = Vec::new();
518        let mut line_start = 0usize;
519
520        for (break_idx, &line_end) in breaks.iter().enumerate() {
521            let is_last = break_idx == breaks.len() - 1;
522            let line_words = &words[line_start..=line_end];
523            let word_widths: Vec<f64> = line_words.iter().map(|(_, _, _, w)| *w).collect();
524            let pending: Vec<(String, AppliedStyle, f64)> = line_words
525                .iter()
526                .map(|(t, s, ls, _)| (t.clone(), s.clone(), *ls))
527                .collect();
528            lines.push(build_line(
529                &pending, &word_widths, max_width_mm, alignment,
530                font_size, space_w, line_h, is_last,
531            ));
532            line_start = line_end + 1;
533        }
534
535        let total_height_mm = lines.len() as f64 * line_h;
536        LayoutResult { lines, total_height_mm }
537    }
538}
539
540// ── Free function — avoids borrow-checker issues with closures inside impl ────
541
542#[allow(clippy::too_many_arguments)]
543fn build_line(
544    words: &[(String, AppliedStyle, f64)],
545    word_widths: &[f64],
546    max_width_mm: f64,
547    alignment: TextAlign,
548    font_size: f64,
549    space_w: f64,
550    line_h: f64,
551    is_last: bool,
552) -> LineBox {
553    let n = words.len();
554    let words_total: f64 = word_widths.iter().sum();
555    let non_empty = words.iter().filter(|(t, _, _)| !t.is_empty()).count();
556    let spaces_total = if non_empty > 1 { (non_empty - 1) as f64 * space_w } else { 0.0 };
557    let line_w = words_total + spaces_total;
558
559    let base_x = match alignment {
560        TextAlign::Center => ((max_width_mm - line_w) / 2.0).max(0.0),
561        TextAlign::Right => (max_width_mm - line_w).max(0.0),
562        _ => 0.0,
563    };
564
565    let inter_word = if alignment == TextAlign::Justify && !is_last && non_empty > 1 {
566        (max_width_mm - words_total) / (non_empty - 1) as f64
567    } else {
568        space_w
569    };
570
571    let mut segments = Vec::with_capacity(n);
572    let mut x = base_x;
573    let mut first_non_empty = true;
574
575    for ((text, style, ls), &word_w) in words.iter().zip(word_widths.iter()) {
576        if text.is_empty() {
577            // Zero-width placeholder from tab handling — skip without advancing.
578            segments.push(LineSegment {
579                text: String::new(),
580                x_offset_mm: x,
581                style: style.clone(),
582                font_size,
583                letter_spacing_mm: *ls,
584            });
585            continue;
586        }
587        if !first_non_empty {
588            x += inter_word;
589        }
590        first_non_empty = false;
591        segments.push(LineSegment {
592            text: text.clone(),
593            x_offset_mm: x,
594            style: style.clone(),
595            font_size,
596            letter_spacing_mm: *ls,
597        });
598        x += word_w;
599    }
600
601    LineBox {
602        segments,
603        height_mm: line_h,
604        width_mm: line_w,
605        alignment,
606    }
607}