Skip to main content

laser_pdf/text/
pieces.rs

1use std::{
2    borrow::{Borrow, Cow},
3    cell::Cell,
4    iter::Peekable,
5};
6
7use elsa::FrozenMap;
8use icu_properties::LineBreak;
9use icu_segmenter::LineBreakIteratorUtf8;
10
11use crate::fonts::{Font, GeneralMetrics, ShapedGlyph};
12
13struct TextPiecesCacheKey<'a> {
14    text: Cow<'a, str>,
15    font_index: usize,
16    size: f32,
17    color: u32,
18    extra_character_spacing: f32,
19    extra_word_spacing: f32,
20    extra_line_height: f32,
21}
22
23#[derive(Hash, PartialEq, Eq)]
24struct OwnedKey(TextPiecesCacheKey<'static>);
25
26impl<'a> Borrow<TextPiecesCacheKey<'a>> for OwnedKey {
27    fn borrow(&self) -> &TextPiecesCacheKey<'a> {
28        &self.0
29    }
30}
31
32impl<'a> PartialEq for TextPiecesCacheKey<'a> {
33    fn eq(&self, other: &Self) -> bool {
34        self.text == other.text
35            && self.font_index == other.font_index
36            && self.size.to_bits() == other.size.to_bits()
37            && self.color == other.color
38            && self.extra_character_spacing.to_bits() == other.extra_character_spacing.to_bits()
39            && self.extra_word_spacing.to_bits() == other.extra_word_spacing.to_bits()
40            && self.extra_line_height.to_bits() == other.extra_line_height.to_bits()
41    }
42}
43
44impl<'a> Eq for TextPiecesCacheKey<'a> {}
45
46impl<'a> std::hash::Hash for TextPiecesCacheKey<'a> {
47    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
48        self.text.hash(state);
49        self.font_index.hash(state);
50        self.size.to_bits().hash(state);
51        self.color.hash(state);
52        self.extra_character_spacing.to_bits().hash(state);
53        self.extra_word_spacing.to_bits().hash(state);
54        self.extra_line_height.to_bits().hash(state);
55    }
56}
57
58/// A data structure that holds cached text pieces (shaped and unicode-segmentend such that it is
59/// ready for line breaking). This type gets passed around in the contexts and is needed by the
60/// [crate::elements::text::Text] and [crate::elements::rich_text::RichText] elements. Currently
61/// only [Self::new] is public for API stability reasons.
62pub struct TextPiecesCache {
63    line_segmenter: icu_segmenter::LineSegmenter,
64    line_break_map:
65        icu_properties::maps::CodePointMapDataBorrowed<'static, icu_properties::LineBreak>,
66    cache: FrozenMap<OwnedKey, Vec<Piece>>,
67    shape_buffer: Cell<Vec<(Option<usize>, ShapedGlyph)>>,
68}
69
70impl TextPiecesCache {
71    pub fn new() -> Self {
72        TextPiecesCache {
73            line_segmenter: icu_segmenter::LineSegmenter::new_auto(),
74            line_break_map: icu_properties::maps::line_break(),
75            cache: FrozenMap::new(),
76            shape_buffer: Cell::new(Vec::new()),
77        }
78    }
79
80    pub(crate) fn pieces<'a, F: Font>(
81        &'a self,
82        text: &str,
83        font: &F,
84        size: f32,
85        color: u32,
86        extra_character_spacing: f32,
87        extra_word_spacing: f32,
88        extra_line_height: f32,
89    ) -> &'a [Piece] {
90        assert!(size.is_finite());
91        assert!(extra_character_spacing.is_finite());
92        assert!(extra_word_spacing.is_finite());
93        assert!(extra_line_height.is_finite());
94
95        let key = TextPiecesCacheKey {
96            text: Cow::Borrowed(text),
97            font_index: font.index(),
98            size,
99            color,
100            extra_character_spacing,
101            extra_word_spacing,
102            extra_line_height,
103        };
104
105        if let Some(value) = self.cache.get(&key) {
106            value
107        } else {
108            let shaped_hyphen = font.shape(super::HYPHEN, 0., 0.).next().unwrap();
109
110            let mut shaped = self.shape_buffer.take();
111            assert!(shaped.is_empty());
112
113            super::shaping::shape(
114                font,
115                font.fallback_fonts(),
116                None,
117                text,
118                extra_character_spacing / size,
119                extra_word_spacing / size,
120                &mut shaped,
121                0,
122            );
123
124            let segments = self.line_segmenter.segment_str(text).peekable();
125
126            let pieces = Pieces {
127                current: Some(0),
128                text,
129                shaped: shaped.iter(),
130                segments,
131                shaped_hyphen,
132                size,
133                color,
134                extra_line_height,
135                main_font: font,
136                main_font_metrics: font.general_metrics(),
137                fallback_fonts: font.fallback_fonts(),
138                line_break_map: &self.line_break_map,
139            }
140            .collect();
141
142            shaped.clear();
143            self.shape_buffer.set(shaped);
144
145            self.cache.insert(
146                OwnedKey(TextPiecesCacheKey {
147                    text: Cow::Owned(text.to_string()),
148                    ..key
149                }),
150                pieces,
151            )
152        }
153    }
154}
155
156pub struct Piece {
157    pub text: String,
158    pub shaped: Vec<(Option<usize>, ShapedGlyph)>,
159
160    /// The width of the main part of the piece. None means the piece consists only of whitespace.
161    /// This is needed for line breaking to determine how to treat the piece if placed at the end
162    /// of an overflowing line; a piece that consists only of whitespace can be placed there because
163    /// trailing whitespace does not count towards the width of the line. It's not clear whether
164    /// checking for zero would work for this as there might be fonts or specific shapings that
165    /// contain characters with a width of zero but with a visible glyph. The `None` indicates that
166    /// there are only glyphs in this piece that we count as whitespace.
167    pub width: Option<f32>,
168
169    pub height_above_baseline: f32,
170    pub height_below_baseline: f32,
171    pub trailing_whitespace_width: f32,
172
173    /// Only applies when the piece is at the end of the line. Otherwise, it will not be counted
174    /// towards the width and not displayed.
175    pub trailing_hyphen: Option<(Option<usize>, ShapedGlyph)>,
176    pub mandatory_break_after: bool,
177    pub glyph_count: usize,
178    pub empty: bool,
179    pub size: f32,
180    pub color: u32,
181}
182
183pub struct Pieces<'a, 'b, 'c, F> {
184    current: Option<usize>,
185    text: &'a str,
186    shaped: std::slice::Iter<'c, (Option<usize>, ShapedGlyph)>,
187    segments: Peekable<LineBreakIteratorUtf8<'b, 'a>>,
188    main_font: &'a F,
189    main_font_metrics: GeneralMetrics,
190    fallback_fonts: &'a [F],
191    shaped_hyphen: ShapedGlyph,
192    size: f32,
193    color: u32,
194    extra_line_height: f32,
195    line_break_map: &'a icu_properties::maps::CodePointMapDataBorrowed<'static, LineBreak>,
196}
197
198impl<'a, 'b, 'c, F: Font> Iterator for Pieces<'a, 'b, 'c, F> {
199    type Item = Piece;
200
201    fn next(&mut self) -> Option<Self::Item> {
202        let mut shaped = self.shaped.clone();
203
204        let Some(current) = self.current else {
205            return None;
206        };
207
208        // TODO: Handle unsafe_to_break somewhere here. If unsafe_to_break is true when we would
209        // otherwise split pieces we should probably fuse them into one piece because that seems
210        // like the only reasonable thing to do.
211
212        let segment = self.segments.find(|&s| s != 0).unwrap_or_else(|| {
213            self.current = None;
214            self.text.len()
215        });
216
217        let mut iter = std::iter::from_fn({
218            let mut done = false;
219            let shaped = &mut shaped;
220            move || {
221                if done {
222                    return None;
223                }
224
225                let next = shaped.next()?;
226
227                if next.1.text_range.end >= segment {
228                    done = true;
229                }
230
231                Some(next)
232            }
233        })
234        .peekable();
235
236        let mut width = None;
237        let mut whitespace_width = 0.;
238        let mut glyph_count = 0;
239        let mut mandatory_break_after = false;
240
241        // A line and its the pieces is always at least as high as the main font. Otherwise empty
242        // lines pieces would have no height. We could special case the empty line case, but that
243        // would lead to the the possibility of an empty line being higher than a line that only has
244        // glyphs from a fallback font.
245        let mut height_above_baseline = self.main_font_metrics.height_above_baseline;
246        let mut height_below_baseline = self.main_font_metrics.height_below_baseline;
247
248        while let Some(glyph) = iter.next() {
249            glyph_count += 1;
250
251            // A space at the end of a line doesn't count towards the width.
252            if matches!(
253                &self.text[glyph.1.text_range.clone()],
254                " " | "\u{00A0}" | " "
255            ) {
256                whitespace_width += glyph.1.x_advance;
257            } else if matches!(
258                self.text[glyph.1.text_range.clone()]
259                    .chars()
260                    .next()
261                    .map(|c| self.line_break_map.get(c)),
262                Some(
263                    LineBreak::MandatoryBreak
264                        | LineBreak::CarriageReturn
265                        | LineBreak::LineFeed
266                        | LineBreak::NextLine,
267                )
268            ) {
269                // We probably can't break here because the font might generate two missing glyphs
270                // for a \r\n here.
271                mandatory_break_after = true;
272            } else {
273                *width.get_or_insert(0.) += whitespace_width + glyph.1.x_advance;
274                whitespace_width = 0.;
275            }
276
277            let font = glyph.0.map_or(self.main_font, |i| &self.fallback_fonts[i]);
278
279            let metrics = font.general_metrics();
280
281            height_above_baseline = height_above_baseline.max(metrics.height_above_baseline);
282            height_below_baseline = height_below_baseline.max(metrics.height_below_baseline);
283        }
284
285        let text = &self.text[current..segment];
286
287        // TODO: Handle the case of a soft hyphen followed by a space. Currently that just gets
288        // ignored.
289        let trailing_hyphen = text
290            .ends_with('\u{00AD}')
291            .then_some(self.shaped_hyphen.clone());
292
293        let piece = Piece {
294            text: text.to_string(),
295            shaped: self
296                .shaped
297                .by_ref()
298                .take(glyph_count)
299                .map(|&(f, ref g)| {
300                    (
301                        f,
302                        ShapedGlyph {
303                            text_range: (g.text_range.start - current)
304                                ..(g.text_range.end - current),
305                            ..g.clone()
306                        },
307                    )
308                })
309                .collect(),
310            width: width.map(|w| w * self.size),
311            height_above_baseline: height_above_baseline * self.size,
312            // TODO: Would it be better if this was only added to the below-baseline height of the
313            // main font?
314            height_below_baseline: height_below_baseline * self.size + self.extra_line_height,
315            trailing_whitespace_width: whitespace_width * self.size,
316            trailing_hyphen: trailing_hyphen.map(|glyph| (None, glyph)), // TODO: fallback if main font has no hyphen
317            mandatory_break_after,
318            glyph_count,
319
320            // TODO: This might not work for \r\n, but that depends on the shaping. We should
321            // proabably find a way to filter out newlines entirely so that they don't show up after
322            // line breaking (and maybe also don't get shaped?).
323            empty: glyph_count == 0 || (glyph_count == 1 && mandatory_break_after),
324
325            size: self.size,
326            color: self.color,
327        };
328
329        self.current = self.current.and(Some(segment));
330        self.shaped = shaped;
331
332        if self.segments.peek().is_none() && !mandatory_break_after {
333            self.current = None;
334        }
335
336        Some(piece)
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use crate::{fonts::ShapedGlyph, text::pieces::Piece};
343
344    use super::*;
345
346    #[derive(Debug)]
347    struct FakeFont;
348
349    #[derive(Clone, Debug)]
350    struct FakeShaped<'a> {
351        // last: usize,
352        inner: std::str::CharIndices<'a>,
353    }
354
355    impl<'a> Iterator for FakeShaped<'a> {
356        type Item = ShapedGlyph;
357
358        fn next(&mut self) -> Option<Self::Item> {
359            if let Some((i, c)) = self.inner.next() {
360                Some(ShapedGlyph {
361                    unsafe_to_break: false,
362                    glyph_id: c as u32,
363                    text_range: i..i + c.len_utf8(),
364                    // we don't match newlines here because they produce the missing glyph which has
365                    // a non-zero width.
366                    x_advance_font: if matches!(c, '\u{00ad}') { 0. } else { 1. },
367                    x_advance: if matches!(c, '\u{00ad}') { 0. } else { 1. },
368                    x_offset: 0.,
369                    y_offset: 0.,
370                    y_advance: 0.,
371                })
372            } else {
373                None
374            }
375        }
376    }
377
378    impl Font for FakeFont {
379        type Shaped<'a>
380            = FakeShaped<'a>
381        where
382            Self: 'a;
383
384        fn shape<'a>(&'a self, text: &'a str, _: f32, _: f32) -> Self::Shaped<'a> {
385            FakeShaped {
386                inner: text.char_indices(),
387            }
388        }
389
390        fn index(&self) -> usize {
391            0
392        }
393
394        fn encode(&self, _: &mut crate::Pdf, _: u32, _: &str) -> crate::fonts::EncodedGlyph {
395            unreachable!()
396        }
397
398        fn resource_name(&self) -> pdf_writer::Name<'_> {
399            unreachable!()
400        }
401
402        fn general_metrics(&self) -> crate::fonts::GeneralMetrics {
403            crate::fonts::GeneralMetrics {
404                height_above_baseline: 0.5,
405                height_below_baseline: 0.5,
406            }
407        }
408
409        fn fallback_fonts(&self) -> &[Self] {
410            &[]
411        }
412    }
413
414    fn collect_piece<'a>(piece: &'a Piece) -> (&'a str, Option<f32>, f32, bool) {
415        let mut text = String::new();
416
417        for glyph in &piece.shaped {
418            let character = glyph.1.glyph_id as u8 as char;
419
420            assert_eq!(
421                character.to_string(),
422                piece.text[glyph.1.text_range.clone()]
423            );
424
425            text.push(glyph.1.glyph_id as u8 as char);
426        }
427
428        assert_eq!(text, piece.text);
429
430        (
431            &piece.text,
432            piece.width,
433            piece.trailing_whitespace_width,
434            piece.mandatory_break_after,
435        )
436    }
437
438    #[test]
439    fn test_empty() {
440        let text = "";
441
442        let cache = TextPiecesCache::new();
443        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
444        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
445
446        assert_eq!(&pieces, &[("", None, 0., false)]);
447    }
448
449    #[test]
450    fn test_one() {
451        let text = "abcde";
452
453        let cache = TextPiecesCache::new();
454        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
455        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
456
457        assert_eq!(&pieces, &[("abcde", Some(5.), 0., false)]);
458    }
459
460    #[test]
461    fn test_two() {
462        let text = "deadbeef defaced";
463
464        let cache = TextPiecesCache::new();
465        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
466        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
467
468        assert_eq!(
469            &pieces,
470            &[
471                ("deadbeef ", Some(8.), 1., false),
472                ("defaced", Some(7.), 0., false),
473            ]
474        );
475    }
476
477    #[test]
478    fn test_three() {
479        let text = "deadbeef defaced fart";
480
481        let cache = TextPiecesCache::new();
482        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
483        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
484
485        assert_eq!(
486            &pieces,
487            &[
488                ("deadbeef ", Some(8.), 1., false),
489                ("defaced ", Some(7.), 1., false),
490                ("fart", Some(4.), 0., false)
491            ],
492        );
493    }
494
495    #[test]
496    fn test_just_newline() {
497        let text = "\n";
498
499        let cache = TextPiecesCache::new();
500        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
501        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
502
503        assert_eq!(&pieces, &[("\n", None, 0., true), ("", None, 0., false)]);
504    }
505
506    #[test]
507    fn test_surrounded_newline() {
508        let text = "abc\ndef";
509
510        let cache = TextPiecesCache::new();
511        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
512        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
513
514        assert_eq!(
515            &pieces,
516            &[("abc\n", Some(3.), 0., true), ("def", Some(3.), 0., false)]
517        );
518    }
519
520    #[test]
521    fn test_newline_at_start() {
522        let text = "\nabc def";
523
524        let cache = TextPiecesCache::new();
525        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
526        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
527
528        assert_eq!(
529            &pieces,
530            &[
531                ("\n", None, 0., true),
532                ("abc ", Some(3.), 1., false),
533                ("def", Some(3.), 0., false),
534            ]
535        );
536    }
537
538    #[test]
539    fn test_trailing_newline() {
540        let text = "abc def\n";
541
542        let cache = TextPiecesCache::new();
543        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
544        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
545
546        assert_eq!(
547            &pieces,
548            &[
549                ("abc ", Some(3.), 1., false),
550                ("def\n", Some(3.), 0., true),
551                ("", None, 0., false),
552            ]
553        );
554    }
555
556    #[test]
557    fn test_newline_after_space() {
558        let text = "abc \ndef";
559
560        let cache = TextPiecesCache::new();
561        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
562        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
563
564        assert_eq!(
565            &pieces,
566            &[("abc \n", Some(3.), 1., true), ("def", Some(3.), 0., false)],
567        );
568    }
569
570    #[test]
571    fn test_trailing_soft_hyphen() {
572        let text = "abc\u{ad}";
573
574        let cache = TextPiecesCache::new();
575        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
576        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
577
578        assert_eq!(&pieces, &[("abc\u{ad}", Some(3.), 0., false)]);
579    }
580
581    #[test]
582    fn test_trailing_soft_hyphen_and_space() {
583        let text = "abc\u{ad} ";
584
585        let cache = TextPiecesCache::new();
586        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
587
588        let pieces: Vec<_> = pieces
589            .iter()
590            .map(|p| {
591                let collected = collect_piece(p);
592
593                (
594                    collected.0,
595                    collected.1,
596                    collected.2,
597                    collected.3,
598                    p.trailing_hyphen.as_ref().map(|h| h.1.x_advance),
599                )
600            })
601            .collect();
602
603        assert_eq!(&pieces, &[("abc\u{ad} ", Some(3.), 1., false, None)]);
604    }
605
606    #[test]
607    fn test_soft_hyphen_after_space() {
608        let text = " \u{ad}abc";
609
610        let cache = TextPiecesCache::new();
611        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
612        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
613
614        assert_eq!(
615            &pieces,
616            &[
617                (" ", None, 1., false),
618                ("\u{ad}", Some(0.), 0., false),
619                ("abc", Some(3.), 0., false),
620            ],
621        );
622    }
623
624    #[test]
625    fn test_soft_hyphen_between_spaces() {
626        let text = " \u{ad} ";
627
628        let cache = TextPiecesCache::new();
629        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
630        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
631
632        assert_eq!(
633            &pieces,
634            &[(" ", None, 1., false), ("\u{ad} ", Some(0.), 1., false)],
635        );
636    }
637
638    #[test]
639    fn test_just_spaces() {
640        let text = "        ";
641
642        let cache = TextPiecesCache::new();
643        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
644        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
645
646        assert_eq!(&pieces, &[("        ", None, 8., false)]);
647    }
648
649    #[test]
650    fn test_mixed_whitespace() {
651        let text = "    abc    \ndef  the\tjflkdsa";
652
653        let cache = TextPiecesCache::new();
654        let pieces = cache.pieces(text, &FakeFont, 1., 0, 0., 0., 0.);
655        let pieces: Vec<_> = pieces.iter().map(collect_piece).collect();
656
657        assert_eq!(
658            &pieces,
659            &[
660                ("    ", None, 4., false),
661                // It's somewhat unclear whether the trailing spaces should count toward the
662                // width here.
663                ("abc    \n", Some(3.), 4., true),
664                ("def  ", Some(3.), 2., false),
665                ("the\t", Some(4.), 0., false),
666                ("jflkdsa", Some(7.), 0., false),
667            ],
668        );
669    }
670}