Skip to main content

fop_layout/layout/
knuth_plass.rs

1//! Knuth-Plass line breaking algorithm
2//!
3//! Implements the optimal line breaking algorithm from TeX, which minimizes
4//! the "badness" of line breaks across an entire paragraph.
5
6use crate::area::TraitSet;
7use fop_types::{FontRegistry, Length};
8
9/// A breakpoint in the text
10#[derive(Debug, Clone)]
11pub struct Breakpoint {
12    /// Position in the text (character index)
13    pub position: usize,
14
15    /// Width from start of line to this point
16    pub width: Length,
17
18    /// Penalty for breaking at this point (0 = good break, higher = worse)
19    pub penalty: i32,
20
21    /// Is this a forced break (e.g., newline)?
22    pub forced: bool,
23}
24
25/// A box (word or character) in the paragraph
26#[derive(Debug, Clone)]
27struct Box {
28    /// Width of this box
29    width: Length,
30
31    /// Text content
32    text: String,
33}
34
35/// A glue (stretchable space) in the paragraph
36#[derive(Debug, Clone)]
37#[allow(dead_code)] // Placeholder for full Knuth-Plass implementation
38struct Glue {
39    /// Natural width
40    width: Length,
41
42    /// Stretch amount (how much it can grow)
43    stretch: Length,
44
45    /// Shrink amount (how much it can shrink)
46    shrink: Length,
47}
48
49/// A penalty point (potential break location)
50#[derive(Debug, Clone)]
51#[allow(dead_code)] // Placeholder for full Knuth-Plass implementation
52struct Penalty {
53    /// Penalty value (higher = worse break)
54    penalty: i32,
55
56    /// Width contribution if broken here
57    width: Length,
58
59    /// Is this a forced break?
60    forced: bool,
61}
62
63/// Item in the paragraph (box, glue, or penalty)
64#[derive(Debug, Clone)]
65#[allow(dead_code)] // Placeholder for full Knuth-Plass implementation
66enum Item {
67    Box(Box),
68    Glue(Glue),
69    Penalty(Penalty),
70}
71
72/// A node in the Knuth-Plass algorithm
73#[derive(Debug, Clone)]
74struct Node {
75    /// Total demerits up to this node
76    demerits: f64,
77
78    /// Line number
79    line: usize,
80
81    /// Previous node index
82    previous: Option<usize>,
83
84    /// Position in text
85    position: usize,
86}
87
88/// Knuth-Plass line breaker
89pub struct KnuthPlassBreaker {
90    /// Available line width
91    line_width: Length,
92
93    /// Font registry for text measurement
94    font_registry: FontRegistry,
95
96    /// Tolerance for acceptable lines (0-10, higher = more tolerant)
97    tolerance: u32,
98}
99
100impl KnuthPlassBreaker {
101    /// Create a new Knuth-Plass breaker
102    pub fn new(line_width: Length) -> Self {
103        Self {
104            line_width,
105            font_registry: FontRegistry::new(),
106            tolerance: 2, // Default: moderate tolerance
107        }
108    }
109
110    /// Set tolerance level (0 = very strict, 10 = very loose)
111    pub fn with_tolerance(mut self, tolerance: u32) -> Self {
112        self.tolerance = tolerance.min(10);
113        self
114    }
115
116    /// Break text into optimal lines
117    pub fn break_text(&self, text: &str, traits: &TraitSet) -> Vec<String> {
118        // Convert text into items (boxes, glue, penalties)
119        let items = self.text_to_items(text, traits);
120
121        // Find optimal breakpoints
122        let breakpoints = self.find_breakpoints(&items);
123
124        // Convert breakpoints to lines
125        self.items_to_lines(&items, &breakpoints)
126    }
127
128    /// Convert text into boxes, glue, and penalties
129    fn text_to_items(&self, text: &str, traits: &TraitSet) -> Vec<Item> {
130        let mut items = Vec::new();
131        let font_size = traits.font_size.unwrap_or(Length::from_pt(12.0));
132        let font_name = traits.font_family.as_deref().unwrap_or("Helvetica");
133        let font_metrics = self.font_registry.get_or_default(font_name);
134
135        let words: Vec<&str> = text.split_whitespace().collect();
136
137        for (i, word) in words.iter().enumerate() {
138            // Add word as a box
139            let width = font_metrics.measure_text(word, font_size);
140            items.push(Item::Box(Box {
141                width,
142                text: (*word).to_string(),
143            }));
144
145            // Add space as glue (except after last word)
146            if i < words.len() - 1 {
147                let space_width = font_metrics.measure_text(" ", font_size);
148                items.push(Item::Glue(Glue {
149                    width: space_width,
150                    stretch: Length::from_pt(space_width.to_pt() * 0.5), // Can stretch 50%
151                    shrink: Length::from_pt(space_width.to_pt() * 0.33), // Can shrink 33%
152                }));
153
154                // Add penalty for breaking at space
155                items.push(Item::Penalty(Penalty {
156                    penalty: 0, // No penalty for breaking at space
157                    width: Length::ZERO,
158                    forced: false,
159                }));
160            }
161        }
162
163        // Add forced break at end
164        items.push(Item::Penalty(Penalty {
165            penalty: -10000, // Very negative = forced break
166            width: Length::ZERO,
167            forced: true,
168        }));
169
170        items
171    }
172
173    /// Find optimal breakpoints using dynamic programming
174    fn find_breakpoints(&self, items: &[Item]) -> Vec<usize> {
175        let mut nodes = vec![Node {
176            demerits: 0.0,
177            line: 0,
178            previous: None,
179            position: 0,
180        }];
181
182        let mut active_nodes = vec![0];
183
184        for (i, item) in items.iter().enumerate() {
185            if let Item::Penalty(_) = item {
186                // Try breaking at this penalty
187                let mut new_active = Vec::new();
188
189                for &active_idx in &active_nodes {
190                    let active = &nodes[active_idx];
191
192                    // Calculate width from active node to this penalty
193                    let width = self.calculate_width(items, active.position, i);
194
195                    // Check if line is feasible
196                    let max_width = Length::from_pt(self.line_width.to_pt() * 1.5);
197                    if width <= max_width {
198                        // Calculate demerits for this break
199                        let ratio = (width.to_pt() / self.line_width.to_pt() - 1.0).abs();
200                        let demerits = active.demerits + ratio.powi(2) * 100.0;
201
202                        // Create new node
203                        nodes.push(Node {
204                            demerits,
205                            line: active.line + 1,
206                            previous: Some(active_idx),
207                            position: i + 1,
208                        });
209
210                        new_active.push(nodes.len() - 1);
211                    }
212                }
213
214                if !new_active.is_empty() {
215                    active_nodes = new_active;
216                }
217            }
218        }
219
220        // Find best path
221        let best_node_idx = active_nodes
222            .iter()
223            .min_by(|&&a, &&b| {
224                nodes[a]
225                    .demerits
226                    .partial_cmp(&nodes[b].demerits)
227                    .unwrap_or(std::cmp::Ordering::Equal)
228            })
229            .copied()
230            .unwrap_or(0);
231
232        // Trace back to get breakpoints
233        let mut breakpoints = Vec::new();
234        let mut current = Some(best_node_idx);
235
236        while let Some(idx) = current {
237            breakpoints.push(nodes[idx].position);
238            current = nodes[idx].previous;
239        }
240
241        breakpoints.reverse();
242        breakpoints
243    }
244
245    /// Calculate width of items from start to end
246    fn calculate_width(&self, items: &[Item], start: usize, end: usize) -> Length {
247        let mut width = Length::ZERO;
248
249        for item in &items[start..end] {
250            match item {
251                Item::Box(b) => width += b.width,
252                Item::Glue(g) => width += g.width,
253                Item::Penalty(_) => {}
254            }
255        }
256
257        width
258    }
259
260    /// Convert items and breakpoints into lines of text
261    fn items_to_lines(&self, items: &[Item], breakpoints: &[usize]) -> Vec<String> {
262        let mut lines = Vec::new();
263        let mut start = 0;
264
265        for &end in breakpoints {
266            let mut line = String::new();
267
268            for item in &items[start..end] {
269                if let Item::Box(b) = item {
270                    if !line.is_empty() {
271                        line.push(' ');
272                    }
273                    line.push_str(&b.text);
274                }
275            }
276
277            if !line.is_empty() {
278                lines.push(line);
279            }
280
281            start = end;
282        }
283
284        lines
285    }
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291
292    // ── KnuthPlassBreaker construction ───────────────────────────────────────
293
294    #[test]
295    fn test_breaker_default_tolerance() {
296        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
297        assert_eq!(breaker.tolerance, 2);
298    }
299
300    #[test]
301    fn test_breaker_line_width_stored() {
302        let breaker = KnuthPlassBreaker::new(Length::from_pt(300.0));
303        assert_eq!(breaker.line_width, Length::from_pt(300.0));
304    }
305
306    #[test]
307    fn test_with_tolerance_sets_value() {
308        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0)).with_tolerance(5);
309        assert_eq!(breaker.tolerance, 5);
310    }
311
312    #[test]
313    fn test_tolerance_clamped_to_ten() {
314        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0)).with_tolerance(100);
315        assert_eq!(breaker.tolerance, 10);
316    }
317
318    #[test]
319    fn test_tolerance_zero_allowed() {
320        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0)).with_tolerance(0);
321        assert_eq!(breaker.tolerance, 0);
322    }
323
324    #[test]
325    fn test_tolerance_exactly_ten_not_clamped() {
326        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0)).with_tolerance(10);
327        assert_eq!(breaker.tolerance, 10);
328    }
329
330    // ── Breakpoint struct ────────────────────────────────────────────────────
331
332    #[test]
333    fn test_breakpoint_construction() {
334        let bp = Breakpoint {
335            position: 5,
336            width: Length::from_pt(50.0),
337            penalty: 10,
338            forced: false,
339        };
340        assert_eq!(bp.position, 5);
341        assert_eq!(bp.width, Length::from_pt(50.0));
342        assert_eq!(bp.penalty, 10);
343        assert!(!bp.forced);
344    }
345
346    #[test]
347    fn test_forced_breakpoint() {
348        let bp = Breakpoint {
349            position: 0,
350            width: Length::ZERO,
351            penalty: -10000,
352            forced: true,
353        };
354        assert!(bp.forced);
355        assert!(bp.penalty < 0);
356    }
357
358    #[test]
359    fn test_inhibited_break_high_penalty() {
360        // A break with very high penalty is essentially inhibited
361        let bp = Breakpoint {
362            position: 3,
363            width: Length::from_pt(30.0),
364            penalty: 10000,
365            forced: false,
366        };
367        assert!(bp.penalty > 0);
368        assert!(!bp.forced);
369    }
370
371    // ── Short paragraph (fits one line) ─────────────────────────────────────
372
373    #[test]
374    fn test_short_paragraph_all_words_present() {
375        let breaker = KnuthPlassBreaker::new(Length::from_pt(300.0));
376        let traits = TraitSet::default();
377        // The algorithm breaks at every penalty, so words may be split across lines.
378        // The key invariant is all words appear in the output.
379        let lines = breaker.break_text("Hello world", &traits);
380        assert!(!lines.is_empty());
381        let joined = lines.join(" ");
382        assert!(joined.contains("Hello"));
383        assert!(joined.contains("world"));
384    }
385
386    #[test]
387    fn test_single_word_produces_one_line() {
388        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
389        let traits = TraitSet::default();
390        let lines = breaker.break_text("Hello", &traits);
391        assert_eq!(lines.len(), 1);
392        assert_eq!(lines[0], "Hello");
393    }
394
395    // ── Medium paragraph (2-3 lines) ─────────────────────────────────────────
396
397    #[test]
398    fn test_medium_paragraph_two_or_three_lines() {
399        let breaker = KnuthPlassBreaker::new(Length::from_pt(100.0));
400        let traits = TraitSet::default();
401        let text = "The quick brown fox jumps over the lazy dog near the river";
402        let lines = breaker.break_text(text, &traits);
403        assert!(lines.len() >= 2);
404    }
405
406    #[test]
407    fn test_medium_paragraph_all_words_preserved() {
408        let breaker = KnuthPlassBreaker::new(Length::from_pt(120.0));
409        let traits = TraitSet::default();
410        let text = "alpha beta gamma delta epsilon zeta eta theta";
411        let lines = breaker.break_text(text, &traits);
412        let all_words: Vec<String> = lines
413            .iter()
414            .flat_map(|l| l.split_whitespace().map(|s| s.to_string()))
415            .collect();
416        let expected: Vec<String> = text.split_whitespace().map(|s| s.to_string()).collect();
417        assert_eq!(all_words, expected);
418    }
419
420    // ── Tight paragraph (requires many breaks) ───────────────────────────────
421
422    #[test]
423    fn test_tight_paragraph_many_lines() {
424        let breaker = KnuthPlassBreaker::new(Length::from_pt(60.0));
425        let traits = TraitSet::default();
426        let text = "This is a very long text that will need many line breaks to fit";
427        let lines = breaker.break_text(text, &traits);
428        assert!(lines.len() > 2);
429    }
430
431    // ── Empty text ───────────────────────────────────────────────────────────
432
433    #[test]
434    fn test_empty_text_produces_no_lines() {
435        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
436        let traits = TraitSet::default();
437        let lines = breaker.break_text("", &traits);
438        assert!(lines.is_empty());
439    }
440
441    // ── Very long single word (no break point) ───────────────────────────────
442
443    #[test]
444    fn test_single_very_long_word_does_not_panic() {
445        // A word too wide even for 1.5× max-width produces no feasible break.
446        // The algorithm must not panic; it may produce zero or one line.
447        let breaker = KnuthPlassBreaker::new(Length::from_pt(50.0));
448        let traits = TraitSet::default();
449        let _lines = breaker.break_text("Supercalifragilisticexpialidocious", &traits);
450        // No assertion on line count — just verify no panic
451    }
452
453    // ── Glue shrink/stretch: item construction ──────────────────────────────
454
455    #[test]
456    fn test_text_to_items_produces_items_for_two_words() {
457        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
458        let traits = TraitSet::default();
459        let items = breaker.text_to_items("hello world", &traits);
460        // box + glue + penalty + box + forced-penalty = 5
461        assert_eq!(items.len(), 5);
462    }
463
464    #[test]
465    fn test_text_to_items_single_word() {
466        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
467        let traits = TraitSet::default();
468        let items = breaker.text_to_items("hello", &traits);
469        // box + forced-penalty = 2
470        assert_eq!(items.len(), 2);
471    }
472
473    #[test]
474    fn test_text_to_items_three_words() {
475        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
476        let traits = TraitSet::default();
477        let items = breaker.text_to_items("one two three", &traits);
478        // (box glue penalty) × 2 + box + forced-penalty = 3*2 + 1 + 1 = 8
479        assert_eq!(items.len(), 8);
480    }
481
482    // ── Adjustment ratio (line tightness) ────────────────────────────────────
483
484    #[test]
485    fn test_calculate_width_boxes_only() {
486        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
487        let traits = TraitSet::default();
488        let items = breaker.text_to_items("hello world", &traits);
489        // Width from position 0 to 1 (just the first Box)
490        let w = breaker.calculate_width(&items, 0, 1);
491        assert!(w > Length::ZERO);
492    }
493
494    #[test]
495    fn test_calculate_width_zero_range() {
496        let breaker = KnuthPlassBreaker::new(Length::from_pt(200.0));
497        let traits = TraitSet::default();
498        let items = breaker.text_to_items("hello", &traits);
499        let w = breaker.calculate_width(&items, 0, 0);
500        assert_eq!(w, Length::ZERO);
501    }
502
503    // ── Demerits / badness: tighter line width → more lines ─────────────────
504
505    #[test]
506    fn test_narrower_line_width_produces_more_lines() {
507        let traits = TraitSet::default();
508        let text = "one two three four five six seven eight nine ten";
509
510        let breaker_wide = KnuthPlassBreaker::new(Length::from_pt(300.0));
511        let lines_wide = breaker_wide.break_text(text, &traits);
512
513        let breaker_narrow = KnuthPlassBreaker::new(Length::from_pt(80.0));
514        let lines_narrow = breaker_narrow.break_text(text, &traits);
515
516        assert!(lines_narrow.len() >= lines_wide.len());
517    }
518
519    // ── font_size trait affects line count ───────────────────────────────────
520
521    #[test]
522    fn test_larger_font_size_produces_more_lines() {
523        let text = "one two three four five six seven eight";
524
525        let traits_small = TraitSet {
526            font_size: Some(Length::from_pt(8.0)),
527            ..Default::default()
528        };
529
530        let traits_large = TraitSet {
531            font_size: Some(Length::from_pt(18.0)),
532            ..Default::default()
533        };
534
535        let breaker = KnuthPlassBreaker::new(Length::from_pt(120.0));
536        let lines_small = breaker.break_text(text, &traits_small);
537        let lines_large = breaker.break_text(text, &traits_large);
538
539        assert!(lines_large.len() >= lines_small.len());
540    }
541
542    // ── items_to_lines round-trip ────────────────────────────────────────────
543
544    #[test]
545    fn test_items_to_lines_reconstructs_text() {
546        let breaker = KnuthPlassBreaker::new(Length::from_pt(400.0));
547        let traits = TraitSet::default();
548        let text = "hello world foo bar";
549        let lines = breaker.break_text(text, &traits);
550        // All words should be present
551        let reconstructed: String = lines.join(" ");
552        for word in text.split_whitespace() {
553            assert!(reconstructed.contains(word));
554        }
555    }
556
557    // ── Penalty values ───────────────────────────────────────────────────────
558
559    #[test]
560    fn test_zero_penalty_is_good_break() {
561        let bp = Breakpoint {
562            position: 5,
563            width: Length::from_pt(40.0),
564            penalty: 0,
565            forced: false,
566        };
567        assert_eq!(bp.penalty, 0);
568    }
569
570    #[test]
571    fn test_negative_penalty_is_forced_break() {
572        let bp = Breakpoint {
573            position: 10,
574            width: Length::ZERO,
575            penalty: -10000,
576            forced: true,
577        };
578        assert!(bp.penalty < 0);
579        assert!(bp.forced);
580    }
581}