1use std::collections::BTreeSet;
2
3use crate::document::format_coord;
4use crate::fonts::{BuiltinFont, FontMetrics, FontRef};
5use crate::truetype::TrueTypeFont;
6use crate::writer::escape_pdf_string;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
10pub enum WordBreak {
11 #[default]
13 BreakAll,
14 Hyphenate,
16 Normal,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FitResult {
23 Stop,
25 BoxFull,
27 BoxEmpty,
29}
30
31#[derive(Debug, Clone, Copy)]
36pub struct Rect {
37 pub x: f64,
39 pub y: f64,
41 pub width: f64,
43 pub height: f64,
45}
46
47#[derive(Debug, Default)]
49pub struct UsedFonts {
50 pub builtin: BTreeSet<BuiltinFont>,
52 pub truetype: BTreeSet<usize>,
54}
55
56#[derive(Debug, Clone)]
58pub struct TextStyle {
59 pub font: FontRef,
61 pub font_size: f64,
63}
64
65impl Default for TextStyle {
66 fn default() -> Self {
67 TextStyle {
68 font: FontRef::Builtin(BuiltinFont::Helvetica),
69 font_size: 12.0,
70 }
71 }
72}
73
74impl TextStyle {
75 pub fn builtin(font: BuiltinFont, font_size: f64) -> Self {
77 TextStyle {
78 font: FontRef::Builtin(font),
79 font_size,
80 }
81 }
82}
83
84#[derive(Debug, Clone)]
86struct TextSpan {
87 text: String,
88 style: TextStyle,
89}
90
91#[derive(Debug, Clone)]
94struct Word {
95 text: String,
96 style: TextStyle,
97 leading_space: bool,
98}
99
100#[derive(Debug)]
103pub struct TextFlow {
104 spans: Vec<TextSpan>,
105 cursor: usize,
107 pub word_break: WordBreak,
109}
110
111impl TextFlow {
112 pub fn new() -> Self {
114 TextFlow {
115 spans: Vec::new(),
116 cursor: 0,
117 word_break: WordBreak::BreakAll,
118 }
119 }
120
121 pub fn add_text(&mut self, text: &str, style: &TextStyle) {
123 self.spans.push(TextSpan {
124 text: text.to_string(),
125 style: style.clone(),
126 });
127 }
128
129 pub fn is_finished(&self) -> bool {
131 let words = self.extract_words();
132 self.cursor >= words.len()
133 }
134
135 fn extract_words(&self) -> Vec<Word> {
138 let mut words = Vec::new();
139 let mut had_space = false;
140 for span in &self.spans {
141 let mut chars = span.text.chars().peekable();
142
143 while chars.peek().is_some() {
144 while chars.peek() == Some(&' ') {
146 had_space = true;
147 chars.next();
148 }
149
150 if chars.peek() == Some(&'\n') {
151 chars.next();
152 words.push(Word {
153 text: "\n".to_string(),
154 style: span.style.clone(),
155 leading_space: false,
156 });
157 had_space = false;
158 continue;
159 }
160
161 let mut word = String::new();
163 while let Some(&ch) = chars.peek() {
164 if ch == ' ' || ch == '\n' {
165 break;
166 }
167 word.push(ch);
168 chars.next();
169 }
170
171 if !word.is_empty() {
172 words.push(Word {
173 text: word,
174 style: span.style.clone(),
175 leading_space: had_space && !words.is_empty(),
176 });
177 had_space = false;
178 }
179 }
180 }
181 words
182 }
183
184 pub fn generate_content_ops(
194 &mut self,
195 rect: &Rect,
196 tt_fonts: &mut [TrueTypeFont],
197 ) -> (Vec<u8>, FitResult, UsedFonts) {
198 let empty = UsedFonts::default();
199 let raw_words = self.extract_words();
200 let words = if self.word_break != WordBreak::Normal {
201 break_wide_words(raw_words, rect.width, self.word_break, tt_fonts)
202 } else {
203 raw_words
204 };
205 if self.cursor >= words.len() {
206 return (Vec::new(), FitResult::Stop, empty);
207 }
208
209 let mut output = Vec::new();
210 let mut used = UsedFonts::default();
211 let first_word = &words[self.cursor];
212 let first_line_height = line_height_for(&first_word.style, tt_fonts);
213
214 if first_line_height > rect.height {
216 return (Vec::new(), FitResult::BoxEmpty, empty);
217 }
218
219 output.extend_from_slice(b"BT\n");
220
221 let first_baseline_y = rect.y - first_word.style.font_size;
224 let mut current_y = first_baseline_y;
225 let mut is_first_line = true;
226 let mut any_text_placed = false;
227
228 let mut active_font: Option<FontRef> = None;
230 let mut active_size: Option<f64> = None;
231
232 while self.cursor < words.len() {
233 let line_height = line_height_for(&words[self.cursor].style, tt_fonts);
234
235 if !is_first_line {
236 let next_y = current_y - line_height;
237 let bottom = rect.y - rect.height;
238 if next_y < bottom {
239 output.extend_from_slice(b"ET\n");
240 return (output, FitResult::BoxFull, used);
241 }
242 }
243
244 let line_start = self.cursor;
246 let mut line_width: f64 = 0.0;
247 let mut line_end = self.cursor;
248
249 while line_end < words.len() {
250 let word = &words[line_end];
251
252 if word.text == "\n" {
253 line_end += 1;
254 break;
255 }
256
257 let word_width = measure_word(&word.text, &word.style, tt_fonts);
258 let space_width = if word.leading_space {
259 measure_word(" ", &word.style, tt_fonts)
260 } else {
261 0.0
262 };
263
264 let total = line_width + space_width + word_width;
265 if total > rect.width && line_end > line_start {
266 break;
267 }
268 if total > rect.width && line_end == line_start {
269 if !any_text_placed {
270 output.extend_from_slice(b"ET\n");
271 return (Vec::new(), FitResult::BoxEmpty, UsedFonts::default());
272 }
273 line_end += 1;
274 break;
275 }
276
277 line_width = total;
278 line_end += 1;
279 }
280
281 if line_end == line_start {
282 break;
283 }
284
285 if is_first_line {
287 output.extend_from_slice(
288 format!(
289 "{} {} Td\n",
290 format_coord(rect.x),
291 format_coord(first_baseline_y),
292 )
293 .as_bytes(),
294 );
295 is_first_line = false;
296 } else {
297 output.extend_from_slice(
298 format!("0 {} Td\n", format_coord(-line_height),).as_bytes(),
299 );
300 current_y -= line_height;
301 }
302
303 for i in line_start..line_end {
305 let word = &words[i];
306 if word.text == "\n" {
307 continue;
308 }
309 let font_ref = word.style.font;
310 let font_size = word.style.font_size;
311
312 if active_font != Some(font_ref) || active_size != Some(font_size) {
314 let name = pdf_font_name(font_ref, tt_fonts);
315 output.extend_from_slice(
316 format!("/{} {} Tf\n", name, format_coord(font_size),).as_bytes(),
317 );
318 active_font = Some(font_ref);
319 active_size = Some(font_size);
320 record_font(&font_ref, &mut used);
321 }
322
323 let is_first_on_line = i == line_start;
324 let display_text = if word.leading_space && !is_first_on_line {
325 format!(" {}", word.text)
326 } else {
327 word.text.clone()
328 };
329
330 emit_text(&display_text, font_ref, tt_fonts, &mut output);
331 }
332
333 any_text_placed = true;
334 self.cursor = line_end;
335 }
336
337 output.extend_from_slice(b"ET\n");
338
339 let result = if self.cursor >= words.len() {
340 FitResult::Stop
341 } else {
342 FitResult::BoxFull
343 };
344 (output, result, used)
345 }
346}
347
348fn break_wide_words(
358 words: Vec<Word>,
359 max_width: f64,
360 mode: WordBreak,
361 tt_fonts: &[TrueTypeFont],
362) -> Vec<Word> {
363 let mut result: Vec<Word> = Vec::with_capacity(words.len());
364
365 for word in words {
366 if word.text == "\n" {
367 result.push(word);
368 continue;
369 }
370
371 let word_width = measure_word(&word.text, &word.style, tt_fonts);
372 if word_width <= max_width {
373 result.push(word);
374 continue;
375 }
376
377 let ts = TextStyle {
378 font: word.style.font,
379 font_size: word.style.font_size,
380 };
381 let pieces = break_word(&word.text, max_width, &ts, mode, tt_fonts);
382 let leading_space = word.leading_space;
383
384 for (i, piece) in pieces.into_iter().enumerate() {
385 result.push(Word {
386 text: piece,
387 style: word.style.clone(),
388 leading_space: i == 0 && leading_space,
389 });
390 }
391 }
392
393 result
394}
395
396pub(crate) fn break_word(
403 word: &str,
404 avail_width: f64,
405 style: &TextStyle,
406 mode: WordBreak,
407 tt_fonts: &[TrueTypeFont],
408) -> Vec<String> {
409 let hyphen_w = if mode == WordBreak::Hyphenate {
410 measure_word("-", style, tt_fonts)
411 } else {
412 0.0
413 };
414 let mut pieces: Vec<String> = Vec::new();
415 let mut remaining = word;
416
417 while !remaining.is_empty() {
418 let budget = avail_width - hyphen_w;
419 let mut prefix_end = 0;
420 let mut prefix_width = 0.0;
421
422 for ch in remaining.chars() {
423 let next_end = prefix_end + ch.len_utf8();
424 let ch_w = measure_word(&remaining[..next_end], style, tt_fonts) - prefix_width;
425 if prefix_width + ch_w > budget && prefix_end > 0 {
426 break;
427 }
428 prefix_width += ch_w;
429 prefix_end = next_end;
430 if prefix_width >= budget {
432 break;
433 }
434 }
435
436 if prefix_end == 0 {
438 prefix_end = remaining.chars().next().map_or(0, |c| c.len_utf8());
439 }
440
441 let is_last = prefix_end >= remaining.len();
442 let piece = if !is_last && mode == WordBreak::Hyphenate {
443 format!("{}-", &remaining[..prefix_end])
444 } else {
445 remaining[..prefix_end].to_string()
446 };
447 pieces.push(piece);
448 remaining = &remaining[prefix_end..];
449 }
450 pieces
451}
452
453pub(crate) fn line_height_for(style: &TextStyle, tt_fonts: &[TrueTypeFont]) -> f64 {
455 match style.font {
456 FontRef::Builtin(b) => FontMetrics::line_height(b, style.font_size),
457 FontRef::TrueType(id) => tt_fonts[id.0].line_height(style.font_size),
458 }
459}
460
461pub(crate) fn measure_word(text: &str, style: &TextStyle, tt_fonts: &[TrueTypeFont]) -> f64 {
463 match style.font {
464 FontRef::Builtin(b) => FontMetrics::measure_text(text, b, style.font_size),
465 FontRef::TrueType(id) => tt_fonts[id.0].measure_text(text, style.font_size),
466 }
467}
468
469fn pdf_font_name(font: FontRef, tt_fonts: &[TrueTypeFont]) -> String {
471 match font {
472 FontRef::Builtin(b) => b.pdf_name().to_string(),
473 FontRef::TrueType(id) => tt_fonts[id.0].pdf_name.clone(),
474 }
475}
476
477fn record_font(font: &FontRef, used: &mut UsedFonts) {
479 match font {
480 FontRef::Builtin(b) => {
481 used.builtin.insert(*b);
482 }
483 FontRef::TrueType(id) => {
484 used.truetype.insert(id.0);
485 }
486 }
487}
488
489fn emit_text(text: &str, font: FontRef, tt_fonts: &mut [TrueTypeFont], output: &mut Vec<u8>) {
492 match font {
493 FontRef::Builtin(_) => {
494 let escaped = escape_pdf_string(text);
495 output.extend_from_slice(format!("({}) Tj\n", escaped).as_bytes());
496 }
497 FontRef::TrueType(id) => {
498 let hex = tt_fonts[id.0].encode_text_hex(text);
499 output.extend_from_slice(format!("{} Tj\n", hex).as_bytes());
500 }
501 }
502}
503
504#[cfg(test)]
505mod break_word_tests {
506 use super::*;
507 use crate::fonts::BuiltinFont;
508
509 fn hv12() -> TextStyle {
511 TextStyle::builtin(BuiltinFont::Helvetica, 12.0)
512 }
513
514 fn w(text: &str) -> f64 {
516 measure_word(text, &hv12(), &[])
517 }
518
519 #[test]
524 fn empty_word_returns_empty_vec() {
525 let pieces = break_word("", 100.0, &hv12(), WordBreak::BreakAll, &[]);
527 assert!(pieces.is_empty());
528 }
529
530 #[test]
531 fn word_that_fits_returns_single_unchanged_piece() {
532 let style = hv12();
533 let avail = w("hello") + 1.0; let pieces = break_word("hello", avail, &style, WordBreak::BreakAll, &[]);
535 assert_eq!(pieces, vec!["hello"]);
536 }
537
538 #[test]
539 fn word_exactly_at_boundary_is_not_broken() {
540 let style = hv12();
544 let avail = w("www"); let pieces = break_word("www", avail, &style, WordBreak::BreakAll, &[]);
546 assert_eq!(pieces, vec!["www"]);
547 }
548
549 #[test]
554 fn break_all_splits_evenly_on_char_boundary() {
555 let style = hv12();
558 let avail = w("www"); let pieces = break_word("wwwwww", avail, &style, WordBreak::BreakAll, &[]);
560 assert_eq!(pieces, vec!["www", "www"]);
561 }
562
563 #[test]
564 fn break_all_produces_no_hyphens() {
565 let style = hv12();
566 let avail = w("ww"); let pieces = break_word("wwww", avail, &style, WordBreak::BreakAll, &[]);
568 for piece in &pieces {
569 assert!(
570 !piece.ends_with('-'),
571 "BreakAll should not add hyphens, got: {:?}",
572 pieces
573 );
574 }
575 }
576
577 #[test]
578 fn break_all_three_pieces() {
579 let style = hv12();
582 let avail = w("iii");
583 let pieces = break_word("iiiiiiiii", avail, &style, WordBreak::BreakAll, &[]);
584 assert_eq!(pieces, vec!["iii", "iii", "iii"]);
585 }
586
587 #[test]
592 fn hyphenate_adds_hyphen_to_non_last_pieces() {
593 let style = hv12();
597 let avail = w("www"); let pieces = break_word("wwwwww", avail, &style, WordBreak::Hyphenate, &[]);
599 let (last, rest) = pieces.split_last().unwrap();
601 for piece in rest {
602 assert!(
603 piece.ends_with('-'),
604 "non-last piece should end with '-', got: {:?}",
605 piece
606 );
607 }
608 assert!(
609 !last.ends_with('-'),
610 "last piece must not end with '-', got: {:?}",
611 last
612 );
613 }
614
615 #[test]
616 fn hyphenate_last_piece_never_ends_with_hyphen() {
617 let style = hv12();
620 let avail = w("www"); let pieces = break_word("wwwwwwww", avail, &style, WordBreak::Hyphenate, &[]);
622 assert!(pieces.len() > 1, "expected a split");
623 assert!(!pieces.last().unwrap().ends_with('-'));
624 }
625
626 #[test]
627 fn hyphenate_word_fitting_budget_produces_one_piece_without_hyphen() {
628 let style = hv12();
632 let avail = w("hello") + w("-") + 1.0;
633 let pieces = break_word("hello", avail, &style, WordBreak::Hyphenate, &[]);
634 assert_eq!(pieces, vec!["hello"]);
635 }
636
637 #[test]
638 fn hyphenate_pieces_respect_hyphen_width_budget() {
639 let style = hv12();
641 let avail = w("www"); let pieces = break_word("wwwwwwwwww", avail, &style, WordBreak::Hyphenate, &[]);
643 for piece in &pieces {
644 let piece_w = measure_word(piece, &style, &[]);
645 assert!(
646 piece_w <= avail + f64::EPSILON,
647 "piece {:?} ({:.3}pt) exceeds avail ({:.3}pt)",
648 piece,
649 piece_w,
650 avail
651 );
652 }
653 }
654
655 #[test]
660 fn single_char_wider_than_budget_still_emitted() {
661 let style = hv12();
664 let tiny = 1.0; let pieces = break_word("iii", tiny, &style, WordBreak::BreakAll, &[]);
666 assert_eq!(pieces, vec!["i", "i", "i"]);
668 }
669
670 #[test]
671 fn single_char_word_with_tiny_budget_returns_that_char() {
672 let style = hv12();
673 let pieces = break_word("w", 1.0, &style, WordBreak::BreakAll, &[]);
674 assert_eq!(pieces, vec!["w"]);
675 }
676
677 #[test]
682 fn multibyte_chars_split_on_codepoint_boundary() {
683 let style = hv12();
687 let pieces = break_word("éàü", 1.0, &style, WordBreak::BreakAll, &[]);
688 for piece in &pieces {
690 assert!(!piece.is_empty());
691 }
692 let rejoined: String = pieces.join("");
694 assert_eq!(rejoined, "éàü");
695 }
696}