1use pdfplumber_core::geometry::{BBox, Ctm, Point};
8use pdfplumber_core::painting::Color;
9use pdfplumber_core::text::{Char, TextDirection};
10
11use crate::font_metrics::FontMetrics;
12use crate::handler::CharEvent;
13
14pub fn char_from_event(
30 event: &CharEvent,
31 metrics: &FontMetrics,
32 page_height: f64,
33 stroking_color: Option<Color>,
34 non_stroking_color: Option<Color>,
35) -> Char {
36 let font_size = event.font_size;
37 let h_scaling = event.h_scaling;
38
39 let font_matrix = Ctm::new(font_size * h_scaling, 0.0, 0.0, font_size, 0.0, event.rise);
42 let tm = ctm_from_array(&event.text_matrix);
43 let ctm = ctm_from_array(&event.ctm);
44 let trm = font_matrix.concat(&tm).concat(&ctm);
45
46 let word_spacing = if event.char_code == 32 {
51 event.word_spacing
52 } else {
53 0.0
54 };
55 let w_norm = if font_size.abs() > f64::EPSILON {
56 event.displacement / 1000.0 + (event.char_spacing + word_spacing) / font_size
57 } else {
58 event.displacement / 1000.0
59 };
60
61 let ascent_norm = metrics.ascent() / 1000.0;
63 let descent_norm = metrics.descent() / 1000.0;
64
65 let corners = [
68 trm.transform_point(Point::new(0.0, descent_norm)),
69 trm.transform_point(Point::new(w_norm, descent_norm)),
70 trm.transform_point(Point::new(w_norm, ascent_norm)),
71 trm.transform_point(Point::new(0.0, ascent_norm)),
72 ];
73
74 let min_x = corners.iter().map(|p| p.x).fold(f64::INFINITY, f64::min);
76 let max_x = corners
77 .iter()
78 .map(|p| p.x)
79 .fold(f64::NEG_INFINITY, f64::max);
80 let min_y = corners.iter().map(|p| p.y).fold(f64::INFINITY, f64::min);
81 let max_y = corners
82 .iter()
83 .map(|p| p.y)
84 .fold(f64::NEG_INFINITY, f64::max);
85
86 let top = page_height - max_y;
88 let bottom = page_height - min_y;
89
90 let bbox = BBox::new(min_x, top, max_x, bottom);
91
92 let upright = trm.b.abs() < 1e-6 && trm.c.abs() < 1e-6;
94
95 let direction = if trm.a.abs() >= trm.b.abs() {
97 if trm.a >= 0.0 {
98 TextDirection::Ltr
99 } else {
100 TextDirection::Rtl
101 }
102 } else if trm.b > 0.0 {
103 TextDirection::Btt
104 } else {
105 TextDirection::Ttb
106 };
107
108 let text = event.unicode.clone().unwrap_or_else(|| {
110 char::from_u32(event.char_code)
111 .map(|c| c.to_string())
112 .unwrap_or_else(|| "\u{FFFD}".to_string())
113 });
114
115 Char {
116 text,
117 bbox,
118 fontname: event.font_name.clone(),
119 size: font_size,
120 doctop: top,
121 upright,
122 direction,
123 stroking_color,
124 non_stroking_color,
125 ctm: event.ctm,
126 char_code: event.char_code,
127 }
128}
129
130fn ctm_from_array(arr: &[f64; 6]) -> Ctm {
132 Ctm::new(arr[0], arr[1], arr[2], arr[3], arr[4], arr[5])
133}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 const PAGE_HEIGHT: f64 = 792.0; fn default_event() -> CharEvent {
143 CharEvent {
144 char_code: 65, unicode: Some("A".to_string()),
146 font_name: "Helvetica".to_string(),
147 font_size: 12.0,
148 text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 720.0],
149 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
150 displacement: 667.0, char_spacing: 0.0,
152 word_spacing: 0.0,
153 h_scaling: 1.0,
154 rise: 0.0,
155 }
156 }
157
158 fn default_metrics() -> FontMetrics {
160 FontMetrics::new(
161 vec![667.0], 65,
163 65,
164 600.0, 750.0, -250.0, None,
168 )
169 }
170
171 fn assert_approx(actual: f64, expected: f64, msg: &str) {
172 assert!(
173 (actual - expected).abs() < 0.01,
174 "{msg}: expected {expected}, got {actual}"
175 );
176 }
177
178 #[test]
181 fn simple_horizontal_text_bbox() {
182 let event = default_event();
183 let metrics = default_metrics();
184
185 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, Some(Color::black()));
186
187 assert_approx(ch.bbox.x0, 72.0, "x0");
192 assert_approx(ch.bbox.top, 63.0, "top");
193 assert_approx(ch.bbox.x1, 80.004, "x1");
194 assert_approx(ch.bbox.bottom, 75.0, "bottom");
195 assert_approx(ch.bbox.width(), 8.004, "width");
196 assert_approx(ch.bbox.height(), 12.0, "height");
197
198 assert_eq!(ch.text, "A");
199 assert_eq!(ch.fontname, "Helvetica");
200 assert_eq!(ch.size, 12.0);
201 assert!(ch.upright);
202 assert_eq!(ch.direction, TextDirection::Ltr);
203 assert_eq!(ch.char_code, 65);
204 assert_eq!(ch.ctm, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
205 }
206
207 #[test]
210 fn scaled_text_bbox() {
211 let event = CharEvent {
212 font_size: 24.0,
213 ..default_event()
214 };
215 let metrics = default_metrics();
216
217 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
218
219 assert_approx(ch.bbox.x0, 72.0, "x0");
223 assert_approx(ch.bbox.top, 54.0, "top");
224 assert_approx(ch.bbox.x1, 88.008, "x1");
225 assert_approx(ch.bbox.bottom, 78.0, "bottom");
226 assert_approx(ch.bbox.width(), 16.008, "width");
227 assert_approx(ch.bbox.height(), 24.0, "height");
228 assert_eq!(ch.size, 24.0);
229 }
230
231 #[test]
234 fn text_with_rise_bbox() {
235 let event = CharEvent {
236 rise: 5.0,
237 ..default_event()
238 };
239 let metrics = default_metrics();
240
241 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
242
243 assert_approx(ch.bbox.x0, 72.0, "x0");
248 assert_approx(ch.bbox.top, 58.0, "top");
249 assert_approx(ch.bbox.x1, 80.004, "x1");
250 assert_approx(ch.bbox.bottom, 70.0, "bottom");
251 assert_approx(ch.bbox.height(), 12.0, "height");
253 }
254
255 #[test]
258 fn rotated_text_matrix_bbox() {
259 let event = CharEvent {
260 text_matrix: [0.0, 1.0, -1.0, 0.0, 200.0, 400.0],
261 ..default_event()
262 };
263 let metrics = default_metrics();
264
265 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
266
267 assert_approx(ch.bbox.x0, 191.0, "x0");
275 assert_approx(ch.bbox.top, 383.996, "top");
276 assert_approx(ch.bbox.x1, 203.0, "x1");
277 assert_approx(ch.bbox.bottom, 392.0, "bottom");
278 assert_approx(ch.bbox.width(), 12.0, "width");
280 assert_approx(ch.bbox.height(), 8.004, "height");
281
282 assert!(!ch.upright);
283 assert_eq!(ch.direction, TextDirection::Btt);
285 }
286
287 #[test]
290 fn ctm_translation_bbox() {
291 let event = CharEvent {
292 ctm: [1.0, 0.0, 0.0, 1.0, 50.0, 50.0],
293 ..default_event()
294 };
295 let metrics = default_metrics();
296
297 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
298
299 assert_approx(ch.bbox.x0, 122.0, "x0");
304 assert_approx(ch.bbox.top, 13.0, "top");
305 assert_approx(ch.bbox.x1, 130.004, "x1");
306 assert_approx(ch.bbox.bottom, 25.0, "bottom");
307 }
308
309 #[test]
312 fn char_spacing_increases_width() {
313 let event = CharEvent {
314 char_spacing: 2.0, ..default_event()
316 };
317 let metrics = default_metrics();
318
319 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
320
321 assert_approx(ch.bbox.width(), 10.004, "width with char_spacing");
324 assert_approx(ch.bbox.height(), 12.0, "height");
326 }
327
328 #[test]
331 fn word_spacing_applied_for_space() {
332 let event = CharEvent {
333 char_code: 32, unicode: Some(" ".to_string()),
335 displacement: 250.0, word_spacing: 3.0,
337 ..default_event()
338 };
339 let metrics = FontMetrics::new(vec![250.0], 32, 32, 600.0, 750.0, -250.0, None);
340
341 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
342
343 assert_approx(ch.bbox.width(), 6.0, "width with word_spacing");
346 }
347
348 #[test]
349 fn word_spacing_not_applied_for_non_space() {
350 let event = CharEvent {
351 word_spacing: 3.0, ..default_event()
353 };
354 let metrics = default_metrics();
355
356 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
357
358 assert_approx(ch.bbox.width(), 8.004, "width without word_spacing");
361 }
362
363 #[test]
366 fn upright_for_horizontal_text() {
367 let event = default_event();
368 let metrics = default_metrics();
369
370 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
371 assert!(ch.upright);
372 }
373
374 #[test]
375 fn not_upright_for_rotated_text() {
376 let event = CharEvent {
377 text_matrix: [0.0, 1.0, -1.0, 0.0, 100.0, 500.0],
378 ..default_event()
379 };
380 let metrics = default_metrics();
381
382 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
383 assert!(!ch.upright);
384 }
385
386 #[test]
389 fn direction_ltr_for_normal_text() {
390 let event = default_event();
391 let metrics = default_metrics();
392
393 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
394 assert_eq!(ch.direction, TextDirection::Ltr);
395 }
396
397 #[test]
398 fn direction_rtl_for_mirrored_text() {
399 let event = CharEvent {
400 text_matrix: [-1.0, 0.0, 0.0, 1.0, 300.0, 720.0],
401 ..default_event()
402 };
403 let metrics = default_metrics();
404
405 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
406 assert_eq!(ch.direction, TextDirection::Rtl);
407 }
408
409 #[test]
410 fn direction_ttb_for_downward_text() {
411 let event = CharEvent {
413 text_matrix: [0.0, -1.0, 1.0, 0.0, 100.0, 700.0],
414 ..default_event()
415 };
416 let metrics = default_metrics();
417
418 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
419 assert_eq!(ch.direction, TextDirection::Ttb);
420 }
421
422 #[test]
423 fn direction_btt_for_upward_text() {
424 let event = CharEvent {
426 text_matrix: [0.0, 1.0, -1.0, 0.0, 100.0, 100.0],
427 ..default_event()
428 };
429 let metrics = default_metrics();
430
431 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
432 assert_eq!(ch.direction, TextDirection::Btt);
433 }
434
435 #[test]
438 fn unicode_from_event() {
439 let event = CharEvent {
440 unicode: Some("B".to_string()),
441 char_code: 66,
442 ..default_event()
443 };
444 let metrics = default_metrics();
445
446 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
447 assert_eq!(ch.text, "B");
448 }
449
450 #[test]
451 fn unicode_fallback_to_char_code() {
452 let event = CharEvent {
453 unicode: None,
454 char_code: 65, ..default_event()
456 };
457 let metrics = default_metrics();
458
459 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
460 assert_eq!(ch.text, "A"); }
462
463 #[test]
464 fn unicode_fallback_replacement_for_invalid() {
465 let event = CharEvent {
466 unicode: None,
467 char_code: 0xFFFFFFFF, ..default_event()
469 };
470 let metrics = default_metrics();
471
472 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
473 assert_eq!(ch.text, "\u{FFFD}");
474 }
475
476 #[test]
479 fn y_flip_converts_to_top_left_origin() {
480 let event = CharEvent {
482 text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 100.0],
483 ..default_event()
484 };
485 let metrics = default_metrics();
486
487 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
488
489 assert_approx(ch.bbox.top, 683.0, "top near page bottom");
492 assert_approx(ch.bbox.bottom, 695.0, "bottom near page bottom");
493 assert_approx(ch.doctop, 683.0, "doctop");
495 }
496
497 #[test]
500 fn colors_passed_through() {
501 let event = default_event();
502 let metrics = default_metrics();
503
504 let stroking = Some(Color::Rgb(1.0, 0.0, 0.0));
505 let non_stroking = Some(Color::Cmyk(0.0, 0.0, 0.0, 1.0));
506
507 let ch = char_from_event(
508 &event,
509 &metrics,
510 PAGE_HEIGHT,
511 stroking.clone(),
512 non_stroking.clone(),
513 );
514
515 assert_eq!(ch.stroking_color, stroking);
516 assert_eq!(ch.non_stroking_color, non_stroking);
517 }
518
519 #[test]
522 fn horizontal_scaling_affects_width() {
523 let event = CharEvent {
524 h_scaling: 0.5, ..default_event()
526 };
527 let metrics = default_metrics();
528
529 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
530
531 assert_approx(ch.bbox.width(), 4.002, "width at 50% h_scaling");
536 assert_approx(ch.bbox.height(), 12.0, "height at 50% h_scaling");
538 }
539
540 #[test]
543 fn default_metrics_produce_reasonable_bbox() {
544 let event = default_event();
545 let metrics = FontMetrics::default_metrics();
546
547 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
548
549 assert_approx(ch.bbox.height(), 12.0, "height with default metrics");
553 assert_approx(ch.bbox.width(), 8.004, "width with default metrics");
555 }
556
557 #[test]
560 fn ctm_scaling_affects_bbox() {
561 let event = CharEvent {
562 text_matrix: [1.0, 0.0, 0.0, 1.0, 36.0, 360.0],
563 ctm: [2.0, 0.0, 0.0, 2.0, 0.0, 0.0],
564 ..default_event()
565 };
566 let metrics = default_metrics();
567
568 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
569
570 assert_approx(ch.bbox.width(), 16.008, "width with 2x CTM");
573 assert_approx(ch.bbox.height(), 24.0, "height with 2x CTM");
574 }
575
576 #[test]
579 fn zero_font_size_does_not_panic() {
580 let event = CharEvent {
581 font_size: 0.0,
582 ..default_event()
583 };
584 let metrics = default_metrics();
585
586 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
588 assert_eq!(ch.size, 0.0);
589 }
590
591 #[test]
594 fn combined_spacing_for_space() {
595 let event = CharEvent {
596 char_code: 32,
597 unicode: Some(" ".to_string()),
598 displacement: 250.0,
599 char_spacing: 1.0,
600 word_spacing: 2.0,
601 ..default_event()
602 };
603 let metrics = FontMetrics::new(vec![250.0], 32, 32, 600.0, 750.0, -250.0, None);
604
605 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
606
607 assert_approx(ch.bbox.width(), 6.0, "width with combined spacing");
610 }
611}