paragraph_breaker/
lib.rs

1use std::cmp::Ordering;
2
3const FITNESS_DEMERITS: u32  =  10_000;
4const FLAGGED_DEMERITS: u32  =  30_000;
5const LINE_PENALTY: u32     =     10;
6
7pub const INFINITE_PENALTY: i32  = 10_000;
8
9const NULL: usize = ::std::usize::MAX;
10
11#[derive(Debug, Copy, Clone)]
12pub enum Item<T> {
13    Box {
14        width: i32,
15        data: T,
16    },
17    Glue {
18        width: i32,
19        stretch: i32,
20        shrink: i32,
21    },
22    Penalty {
23        width: i32,
24        penalty: i32,
25        flagged: bool,
26    },
27}
28
29impl<T> Item<T> {
30    #[inline]
31    pub fn is_box(&self) -> bool {
32        matches!(*self, Item::Box { .. })
33    }
34
35    #[inline]
36    pub fn is_glue(&self) -> bool {
37        matches!(*self, Item::Glue { .. })
38    }
39
40    #[inline]
41    pub fn penalty(&self) -> i32 {
42        match *self {
43            Item::Penalty { penalty, .. } => penalty,
44            _ => 0,
45        }
46    }
47
48    #[inline]
49    pub fn flagged(&self) -> bool {
50        match *self {
51            Item::Penalty { flagged, .. } => flagged,
52            _ => false,
53        }
54    }
55}
56
57#[derive(Debug, Copy, Clone)]
58struct Node {
59    index: usize,
60    line: usize,
61    sums: Sums,
62    ratio: f32,
63    width: i32,
64    demerits: u32,
65    fitness_class: usize,
66    best_from: usize,
67    next: usize,
68}
69
70impl Default for Node {
71    fn default() -> Self {
72        Node {
73            index: 0,
74            line: 0,
75            sums: Sums::default(),
76            ratio: 0.0,
77            width: 0,
78            demerits: 0,
79            fitness_class: 1,
80            best_from: NULL,
81            next: NULL,
82        }
83    }
84}
85
86#[derive(Debug, Copy, Clone)]
87struct Sums {
88    width: i32,
89    stretch: i32,
90    shrink: i32,
91}
92
93impl Default for Sums {
94    fn default() -> Self {
95        Sums {
96            width: 0,
97            stretch: 0,
98            shrink: 0,
99        }
100    }
101}
102
103#[derive(Debug, Copy, Clone)]
104struct Candidate {
105    demerits: u32,
106    address: usize,
107    ratio: f32,
108    width: i32,
109}
110
111#[derive(Debug, Copy, Clone)]
112pub struct Breakpoint {
113    pub index: usize,
114    pub ratio: f32,
115    pub width: i32,
116}
117
118#[inline]
119fn ratio<T>(ideal_len: i32, sums: &Sums, previous_sums: &Sums, item: &Item<T>) -> (f32, i32) {
120    let mut actual_len = sums.width - previous_sums.width;
121
122    if let Item::Penalty { width, .. } = *item {
123        actual_len += width;
124    }
125
126    match actual_len.cmp(&ideal_len) {
127        Ordering::Less => {
128            let stretch = sums.stretch - previous_sums.stretch;
129            if stretch > 0 {
130                ((ideal_len - actual_len) as f32 / stretch as f32, actual_len)
131            } else {
132                (::std::f32::INFINITY, actual_len)
133            }
134        },
135        Ordering::Greater => {
136            let shrink = sums.shrink - previous_sums.shrink;
137            if shrink > 0 {
138                ((ideal_len - actual_len) as f32 / shrink as f32, actual_len)
139            } else {
140                (::std::f32::NEG_INFINITY, actual_len)
141            }
142        },
143        Ordering::Equal => (0.0, actual_len),
144    }
145}
146
147#[inline]
148fn fitness_class(ratio: f32) -> usize {
149    if ratio < -0.5 {
150        0
151    } else if ratio <= 0.5 {
152        1
153    } else if ratio <= 1.0 {
154        2
155    } else {
156        3
157    }
158}
159
160#[inline]
161fn badness(ratio: f32) -> u32 {
162    (100.0 * ratio.abs().powi(3)) as u32
163}
164
165#[inline]
166fn demerits<T>(ratio: f32, class: usize, active: &Node, item: &Item<T>, from_item: &Item<T>) -> u32 {
167    let mut d = (LINE_PENALTY + badness(ratio)).pow(2);
168
169    if item.penalty() >= 0 {
170        d = d.saturating_add(item.penalty().saturating_pow(2) as u32);
171    } else if item.penalty() != -INFINITE_PENALTY {
172        d = d.saturating_sub(item.penalty().saturating_pow(2) as u32);
173    }
174
175    if item.flagged() && from_item.flagged() {
176        d = d.saturating_add(FLAGGED_DEMERITS);
177    }
178
179    if (class as i32 - active.fitness_class as i32).abs() > 1 {
180        d = d.saturating_add(FITNESS_DEMERITS);
181    }
182
183    d = d.saturating_add(active.demerits);
184
185    d
186}
187
188#[inline]
189fn sums_after<T>(sums: &Sums, items: &[Item<T>], index: usize) -> Sums {
190    let mut sums = *sums;
191
192    for (i, item) in items.iter().enumerate().skip(index) {
193        match item {
194            Item::Box { .. } => break,
195            Item::Glue { width, stretch, shrink } => {
196                sums.width += width;
197                sums.stretch += stretch;
198                sums.shrink += shrink;
199            },
200            Item::Penalty { penalty, .. } if *penalty == -INFINITE_PENALTY && i > index => break,
201            _ => {},
202        }
203    }
204
205    sums
206}
207
208#[inline]
209fn explore<T>(nodes: &mut Vec<Node>, head: &mut usize, items: &[Item<T>], lengths: &[i32], sums: &Sums, threshold: f32, boundary: usize, index: usize) {
210    let mut current = *head;
211    let mut previous = NULL;
212
213    loop {
214        let mut min_demerits = ::std::u32::MAX;
215        let mut candidates = [Candidate { demerits: ::std::u32::MAX,
216                                          address: NULL,
217                                          ratio: 0.0,
218                                          width: 0 }; 4];
219        loop {
220            let next = nodes[current].next;
221            let line = nodes[current].line + 1;
222            let ideal_len = lengths[(line - 1).min(lengths.len() - 1)];
223            let (ratio, actual_len) = ratio(ideal_len, sums, &nodes[current].sums, &items[index]);
224
225            if ratio < -1.0 || items[index].penalty() == -INFINITE_PENALTY {
226                // Deactivate node.
227                if previous != NULL {
228                    nodes[previous].next = next;
229                } else {
230                    *head = next;
231                }
232            } else {
233                previous = current;
234            }
235
236            if ratio >= -1.0 && ratio <= threshold {
237                let class = fitness_class(ratio);
238                let d = demerits(ratio, class, &nodes[current],
239                                 &items[index], &items[nodes[current].index]);
240                if d < candidates[class].demerits {
241                    candidates[class].demerits = d;
242                    candidates[class].address = current;
243                    candidates[class].ratio = ratio;
244                    candidates[class].width = actual_len;
245                    if d < min_demerits {
246                        min_demerits = d;
247                    }
248                }
249            }
250
251            current = next;
252
253            if current == NULL {
254                break;
255            }
256
257            if nodes[current].line >= line && line < boundary {
258                break;
259            }
260        }
261
262        if min_demerits < ::std::u32::MAX {
263            for c in 0..candidates.len() {
264                if candidates[c].demerits < min_demerits.saturating_add(FITNESS_DEMERITS) {
265                    let sums_after = sums_after(sums, items, index);
266                    // Activate node.
267                    let new_addr = nodes.len();
268                    let node = Node {
269                        index,
270                        line: nodes[candidates[c].address].line + 1,
271                        fitness_class: c,
272                        sums: sums_after,
273                        ratio: candidates[c].ratio,
274                        width: candidates[c].width,
275                        demerits: candidates[c].demerits,
276                        best_from: candidates[c].address,
277                        next: current,
278                    };
279                    if previous != NULL {
280                        nodes[previous].next = new_addr;
281                    } else {
282                        *head = new_addr;
283                    }
284                    previous = new_addr;
285                    nodes.push(node);
286                }
287            }
288        }
289
290        if current == NULL {
291            break;
292        }
293    }
294}
295
296pub fn total_fit<T>(items: &[Item<T>], lengths: &[i32], mut threshold: f32, looseness: i32) -> Vec<Breakpoint> {
297    let boundary = if looseness != 0 {
298        ::std::usize::MAX
299    } else {
300        lengths.len().saturating_sub(2)
301    };
302
303    // Avoid overflows in the demerits computation.
304    threshold = threshold.min(8.6);
305
306    let mut nodes = Vec::with_capacity(items.len());
307    nodes.push(Node::default());
308
309    let mut head = 0;
310    let mut sums = Sums { width: 0, stretch: 0, shrink: 0 };
311
312    let mut start_index = 0;
313    while start_index < items.len() {
314        match items[start_index] {
315            Item::Box { .. } => break,
316            Item::Penalty { penalty, .. } if penalty == -INFINITE_PENALTY => break,
317            _ => start_index += 1,
318        }
319    }
320
321    for index in start_index..items.len() {
322        match items[index] {
323            Item::Box { width, .. } => sums.width += width,
324            Item::Glue { width, stretch, shrink } => {
325                if index > 0 && items[index-1].is_box() {
326                    explore(&mut nodes, &mut head, items, lengths, &sums, threshold, boundary, index);
327                }
328                sums.width += width;
329                sums.stretch += stretch;
330                sums.shrink += shrink;
331            },
332            Item::Penalty { penalty, .. } if penalty != INFINITE_PENALTY => {
333                explore(&mut nodes, &mut head, items, lengths, &sums, threshold, boundary, index);
334            },
335            _ => {},
336        }
337
338        if head == NULL {
339            break;
340        }
341    }
342
343    if head == NULL {
344        return Vec::new();
345    }
346
347    let mut current = head;
348    let mut chosen = NULL;
349    let mut d = ::std::u32::MAX;
350
351    while current != NULL {
352        if nodes[current].demerits < d {
353            d = nodes[current].demerits;
354            chosen = current;
355        }
356        current = nodes[current].next;
357    }
358
359    let line = nodes[chosen].line;
360
361    if looseness != 0 {
362        let mut current = head;
363        let mut drift = 0;
364
365        while current != NULL {
366            let delta = nodes[current].line as i32 - line as i32;
367            if (looseness <= delta && delta < drift) || (drift < delta && delta <= looseness) {
368                drift = delta;
369                d = nodes[current].demerits;
370                chosen = current;
371            } else if delta == drift && nodes[current].demerits < d {
372                d = nodes[current].demerits;
373                chosen = current;
374            }
375            current = nodes[current].next;
376        }
377    }
378
379    let mut result = Vec::new();
380
381    while chosen != NULL {
382        let node = nodes[chosen];
383        result.push(Breakpoint { index: node.index,
384                                 ratio: node.ratio,
385                                 width: node.width });
386        chosen = node.best_from;
387    }
388
389    result.pop();
390    result.reverse();
391    result
392}
393
394pub fn standard_fit<T>(items: &[Item<T>], lengths: &[i32], threshold: f32) -> Vec<Breakpoint> {
395    let mut result = Vec::new();
396    let mut index = 0;
397    let mut previous_index = index;
398    let mut line = 0;
399    let mut ideal_len = lengths[line.min(lengths.len() - 1)];
400    let mut sums = Sums::default();
401    let mut consecutive_flagged = 0;
402    let mut previous_sums = sums;
403    let mut current;
404
405    while index < items.len() {
406        match items[index] {
407            Item::Box { .. } => break,
408            Item::Penalty { penalty, .. } if penalty == -INFINITE_PENALTY => break,
409            _ => index += 1,
410        }
411    }
412
413    while index < items.len() {
414        current = &items[index];
415
416        match current {
417            Item::Box { width, .. } => {
418                sums.width += width;
419                if (sums.width - previous_sums.width) > ideal_len {
420                    let (mut r, mut w) = ratio(ideal_len, &sums, &previous_sums, current);
421
422                    if r < -1.0 {
423                        let high_index = index;
424                        let high_sums = sums;
425                        let mut boxes_count = 0;
426
427                        while index > previous_index {
428                            current = &items[index];
429
430                            match current {
431                                Item::Box { width, .. } => {
432                                    sums.width -= width;
433                                    boxes_count += 1;
434                                },
435                                Item::Penalty { penalty, flagged, .. } if !flagged && *penalty < INFINITE_PENALTY => {
436                                    break;
437                                },
438                                Item::Glue { width, stretch, shrink } => {
439                                    sums.width -= width;
440                                    sums.stretch -= stretch;
441                                    sums.shrink -= shrink;
442                                    if items[index - 1].is_box() {
443                                        break;
444                                    }
445                                },
446                                _ => (),
447                            }
448
449                            index -= 1;
450                        }
451
452                        if index == previous_index {
453                            if boxes_count == high_index - previous_index {
454                                return Vec::new();
455                            } else {
456                                index = high_index;
457                                sums = high_sums;
458                                while index > previous_index {
459                                    if let Item::Box { width, .. } = items[index] {
460                                        sums.width -= width;
461                                        index -= 1;
462                                    } else {
463                                        break;
464                                    }
465                                }
466                            }
467                        }
468
469                        let (r1, w1) = ratio(ideal_len, &sums, &previous_sums, current);
470                        r = r1; w = w1;
471
472                        if r > threshold {
473                            let low_index = index;
474                            let low_sums = sums;
475                            let low_ratio = r;
476                            let low_width = w;
477                            index = high_index;
478                            sums = high_sums;
479
480                            while index > low_index {
481                                current = &items[index];
482
483                                match current {
484                                    Item::Box { width, .. } => sums.width -= width,
485                                    Item::Penalty { penalty, flagged, .. } if *penalty < INFINITE_PENALTY && (!flagged || consecutive_flagged < 2) => {
486                                        let (r2, w2) = ratio(ideal_len, &sums, &previous_sums, current);
487                                        r = r2; w = w2;
488                                        if r >= -1.0 && r <= low_ratio {
489                                            break;
490                                        }
491                                    },
492                                    Item::Glue { width, stretch, shrink } => {
493                                        sums.width -= width;
494                                        sums.stretch -= stretch;
495                                        sums.shrink -= shrink;
496                                    },
497                                    _ => (),
498                                }
499
500                                index -= 1;
501                            }
502
503                            if index == low_index {
504                                sums = low_sums;
505                                r = low_ratio;
506                                w = low_width;
507                            }
508                        }
509                    } else {
510                        if index == items.len() - 1 || !items[index+1].is_glue() {
511                            index += 1;
512                            continue;
513                        }
514                        index += 1;
515                    }
516
517                    previous_index = index;
518                    previous_sums = sums;
519                    result.push(Breakpoint { index, ratio: r, width: w });
520
521                    if current.flagged() {
522                        consecutive_flagged += 1;
523                    } else {
524                        consecutive_flagged = 0;
525                    }
526
527                    index += 1;
528
529                    while index < items.len() {
530                        current = &items[index];
531                        match current {
532                            Item::Box { .. } => break,
533                            Item::Penalty { penalty, .. } if *penalty == -INFINITE_PENALTY => break,
534                            _ => index += 1,
535                         }
536                    }
537
538                    line += 1;
539                    ideal_len = lengths[line.min(lengths.len() - 1)];
540
541                    continue;
542                }
543            },
544            Item::Glue { width, stretch, shrink } => {
545                sums.width += width;
546                sums.stretch += stretch;
547                sums.shrink += shrink;
548            },
549            Item::Penalty { penalty, .. } if *penalty == -INFINITE_PENALTY => {
550                let (mut r, mut w) = ratio(ideal_len, &sums, &previous_sums, current);
551                if r < -1.0 {
552                    let mut i = index - 1;
553                    while i > previous_index {
554                        current = &items[i];
555                        match current {
556                            Item::Box { .. } => break,
557                            Item::Glue { width, stretch, shrink } => {
558                                sums.width -= width;
559                                sums.stretch -= stretch;
560                                sums.shrink -= shrink;
561                            },
562                            _ => (),
563                        }
564                        i -= 1;
565                    }
566                    let (r1, w1) = ratio(ideal_len, &sums, &previous_sums, current);
567                    r = r1; w = w1;
568                }
569
570                result.push(Breakpoint { index, ratio: r, width: w });
571                previous_index = index;
572                previous_sums = sums;
573                line += 1;
574                ideal_len = lengths[line.min(lengths.len() - 1)];
575            },
576            _ => (),
577        }
578
579        index += 1;
580    }
581
582    result
583}
584
585#[cfg(test)]
586mod tests {
587    use super::*;
588
589    const HYPHEN_PENALTY: i32 = 50;
590
591    // Excerpt from *The Frog Prince* by Brothers Grimm.
592    // Optional breaks are marked with soft hyphens, the only explicit hyphen is in *lime-tree*.
593    const FROG_PRINCE: &str = "In olden times when wish­ing still helped one, there lived a king whose daugh­ters were all beau­ti­ful; and the young­est was so beau­ti­ful that the sun it­self, which has seen so much, was aston­ished when­ever it shone in her face. Close by the king’s castle lay a great dark for­est, and un­der an old lime-tree in the for­est was a well, and when the day was very warm, the king’s child went out into the for­est and sat down by the side of the cool foun­tain; and when she was bored she took a golden ball, and threw it up on high and caught it; and this ball was her favor­ite play­thing.";
594
595    // Traditional Monotype characters widths (in *machine units* (1/18th of an em)),
596    // given by Donald Knuth in *Digital Typography*, p. 75.
597    const LOWERCASE_WIDTHS: [i32; 26] = [9, 10, 8, 10, 8, 6, 9, 10, 5, 6, 10, 5, 15,
598                                         10, 9, 10, 10, 7, 7, 7, 10, 9, 13, 10, 10, 8];
599    fn char_width(c: char) -> i32 {
600        match c {
601            'a' ..= 'z' => LOWERCASE_WIDTHS[c as usize - 'a' as usize],
602            'C' => 13,
603            'I' | '-' | '­' | ' ' => 6,
604            ',' | ';' | '.' | '’' => 5,
605            _ => 0,
606        }
607    }
608
609    fn glue_after<T>(c: char) -> Item<T> {
610        match c {
611            ',' => Item::Glue { width: 6, stretch: 4, shrink: 2 },
612            ';' => Item::Glue { width: 6, stretch: 4, shrink: 1 },
613            '.' => Item::Glue { width: 8, stretch: 6, shrink: 1 },
614            _ => Item::Glue { width: 6, stretch: 3, shrink: 2 },
615        }
616    }
617
618    fn make_items(text: &str) -> Vec<Item<()>> {
619        let mut result = Vec::new();
620        let mut buf = String::new();
621        let mut width = 0;
622        let mut last_c = '*';
623
624        for c in text.chars() {
625            if "- ­".find(c).is_some() {
626                if !buf.is_empty() {
627                    result.push(Item::Box { width, data: () });
628                    buf.clear();
629                    width = 0;
630                }
631            }
632
633            match c {
634                ' ' => result.push(glue_after(last_c)),
635                '-' => {
636                    result.push(Item::Box { width: char_width(c), data: () });
637                    result.push(Item::Penalty { width: 0,
638                                                penalty: HYPHEN_PENALTY,
639                                                flagged: true });
640                },
641                // Soft hyphen.
642                '­' => result.push(Item::Penalty { width: char_width(c),
643                                                   penalty: HYPHEN_PENALTY,
644                                                   flagged: true }),
645                _ => {
646                    buf.push(c);
647                    width += char_width(c);
648                },
649            }
650
651            last_c = c;
652        }
653
654        if !buf.is_empty() {
655            result.push(Item::Box { width, data: () });
656        }
657
658        result.extend_from_slice(&[Item::Penalty { penalty: INFINITE_PENALTY,  width: 0, flagged: false },
659                                   Item::Glue { width: 0, stretch: INFINITE_PENALTY, shrink: 0 },
660                                   Item::Penalty { penalty: -INFINITE_PENALTY,  width: 0, flagged: true }]);
661
662        result
663    }
664
665    macro_rules! pos {
666        ($x:expr) => ($x.iter().map(|x| x.index).collect::<Vec<usize>>());
667    }
668
669    #[test]
670    fn test_breakpoints() {
671        let items = make_items(FROG_PRINCE);
672        let narrow = total_fit(&items, &[372, 390], 1.0, 0);
673        let medium = total_fit(&items, &[482, 500], 1.0, 0);
674        let medium_tight = total_fit(&items, &[482, 500], 1.0, -1);
675        let medium_loose = total_fit(&items, &[482, 500], 2.5, 1);
676
677        // Knuth, Donald: Digital Typography, p. 81.
678        assert_eq!(pos!(narrow), vec![17, 37, 63, 83, 105, 129, 154, 174, 198, 220, 240, 262]);
679        // Ibid, p. 113.
680        assert_eq!(pos!(medium), vec![23, 51, 81, 107, 140, 168, 198, 224, 252, 262]);
681        assert_eq!(pos!(medium_tight), vec![25, 53, 83, 111, 146, 172, 204, 232, 262]);
682        assert_eq!(pos!(medium_loose), vec![21, 47, 77, 101, 129, 158, 182, 208, 234, 258, 262]);
683        // If the algorithm can't satisfy the constraints, the return value is empty.
684        let too_narrow = total_fit(&items, &[82, 100], 1.0, 0);
685        assert!(too_narrow.is_empty());
686
687        // *Standard* algorithm (an informal description is given ibid, p. 68, last paragraph).
688        let std_narrow = standard_fit(&items, &[372, 390], 1.0);
689        let std_medium = standard_fit(&items, &[482, 500], 1.0);
690        assert_eq!(pos!(std_narrow), vec![17, 39, 65, 85, 107, 131, 156, 176, 200, 220, 242, 262]);
691        assert_eq!(pos!(std_medium), vec![25, 53, 83, 111, 146, 172, 204, 232, 262]);
692
693        // If one of the boxes is larger than the line, the return value is empty.
694        let absurd_items = vec![Item::Box { width: 100, data: () },
695                                Item::Glue { width: 0, stretch: 0, shrink: 0 }];
696        let std_absurd = standard_fit(&absurd_items, &[90], 1.0);
697        assert!(std_absurd.is_empty());
698
699        // Ratios should be above or equal to -1.0.
700        let items = vec![Item::Box { width: 100, data: () },
701                         Item::Glue { width: 50, stretch: 20, shrink: 10 },
702                         Item::Box { width: 100, data: () },
703                         Item::Glue { width: 50, stretch: 20, shrink: 10 },
704                         Item::Box { width: 100, data: () },
705                         Item::Penalty { penalty: INFINITE_PENALTY,  width: 0, flagged: false },
706                         Item::Glue { width: 0, stretch: INFINITE_PENALTY, shrink: 0 },
707                         Item::Penalty { penalty: -INFINITE_PENALTY,  width: 0, flagged: true }];
708        let std_items = standard_fit(&items, &[300], 1.0);
709        assert_eq!(pos!(std_items), vec![3, 7]);
710    }
711}