1use pdfplumber_core::geometry::{BBox, Ctm, Point};
8use pdfplumber_core::painting::Color;
9use pdfplumber_core::text::{Char, TextDirection};
10
11use crate::font_metrics::FontMetrics;
12use crate::handler::CharEvent;
13
14pub fn char_from_event(
30 event: &CharEvent,
31 metrics: &FontMetrics,
32 page_height: f64,
33 stroking_color: Option<Color>,
34 non_stroking_color: Option<Color>,
35) -> Char {
36 let font_size = event.font_size;
37 let h_scaling = event.h_scaling;
38
39 let font_matrix = Ctm::new(font_size * h_scaling, 0.0, 0.0, font_size, 0.0, event.rise);
42 let tm = ctm_from_array(&event.text_matrix);
43 let ctm = ctm_from_array(&event.ctm);
44 let trm = font_matrix.concat(&tm).concat(&ctm);
45
46 let word_spacing = if event.char_code == 32 {
51 event.word_spacing
52 } else {
53 0.0
54 };
55 let w_norm = if font_size.abs() > f64::EPSILON {
56 event.displacement / 1000.0 + (event.char_spacing + word_spacing) / font_size
57 } else {
58 event.displacement / 1000.0
59 };
60
61 let ascent_norm = metrics.ascent() / 1000.0;
63 let descent_norm = metrics.descent() / 1000.0;
64
65 let corners = [
68 trm.transform_point(Point::new(0.0, descent_norm)),
69 trm.transform_point(Point::new(w_norm, descent_norm)),
70 trm.transform_point(Point::new(w_norm, ascent_norm)),
71 trm.transform_point(Point::new(0.0, ascent_norm)),
72 ];
73
74 let min_x = corners.iter().map(|p| p.x).fold(f64::INFINITY, f64::min);
76 let max_x = corners
77 .iter()
78 .map(|p| p.x)
79 .fold(f64::NEG_INFINITY, f64::max);
80 let min_y = corners.iter().map(|p| p.y).fold(f64::INFINITY, f64::min);
81 let max_y = corners
82 .iter()
83 .map(|p| p.y)
84 .fold(f64::NEG_INFINITY, f64::max);
85
86 let top = page_height - max_y;
88 let bottom = page_height - min_y;
89
90 let bbox = BBox::new(min_x, top, max_x, bottom);
91
92 let upright = trm.b.abs() < 1e-6 && trm.c.abs() < 1e-6;
94
95 let direction = if trm.a.abs() >= trm.b.abs() {
97 if trm.a >= 0.0 {
98 TextDirection::Ltr
99 } else {
100 TextDirection::Rtl
101 }
102 } else if trm.b > 0.0 {
103 TextDirection::Btt
104 } else {
105 TextDirection::Ttb
106 };
107
108 let text = event.unicode.clone().unwrap_or_else(|| {
110 char::from_u32(event.char_code)
111 .map(|c| c.to_string())
112 .unwrap_or_else(|| "\u{FFFD}".to_string())
113 });
114
115 Char {
116 text,
117 bbox,
118 fontname: event.font_name.clone(),
119 size: font_size,
120 doctop: top,
121 upright,
122 direction,
123 stroking_color,
124 non_stroking_color,
125 ctm: event.ctm,
126 char_code: event.char_code,
127 mcid: None,
128 tag: None,
129 }
130}
131
132fn ctm_from_array(arr: &[f64; 6]) -> Ctm {
134 Ctm::new(arr[0], arr[1], arr[2], arr[3], arr[4], arr[5])
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140
141 const PAGE_HEIGHT: f64 = 792.0; fn default_event() -> CharEvent {
145 CharEvent {
146 char_code: 65, unicode: Some("A".to_string()),
148 font_name: "Helvetica".to_string(),
149 font_size: 12.0,
150 text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 720.0],
151 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
152 displacement: 667.0, char_spacing: 0.0,
154 word_spacing: 0.0,
155 h_scaling: 1.0,
156 rise: 0.0,
157 }
158 }
159
160 fn default_metrics() -> FontMetrics {
162 FontMetrics::new(
163 vec![667.0], 65,
165 65,
166 600.0, 750.0, -250.0, None,
170 )
171 }
172
173 fn assert_approx(actual: f64, expected: f64, msg: &str) {
174 assert!(
175 (actual - expected).abs() < 0.01,
176 "{msg}: expected {expected}, got {actual}"
177 );
178 }
179
180 #[test]
183 fn simple_horizontal_text_bbox() {
184 let event = default_event();
185 let metrics = default_metrics();
186
187 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, Some(Color::black()));
188
189 assert_approx(ch.bbox.x0, 72.0, "x0");
194 assert_approx(ch.bbox.top, 63.0, "top");
195 assert_approx(ch.bbox.x1, 80.004, "x1");
196 assert_approx(ch.bbox.bottom, 75.0, "bottom");
197 assert_approx(ch.bbox.width(), 8.004, "width");
198 assert_approx(ch.bbox.height(), 12.0, "height");
199
200 assert_eq!(ch.text, "A");
201 assert_eq!(ch.fontname, "Helvetica");
202 assert_eq!(ch.size, 12.0);
203 assert!(ch.upright);
204 assert_eq!(ch.direction, TextDirection::Ltr);
205 assert_eq!(ch.char_code, 65);
206 assert_eq!(ch.ctm, [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]);
207 }
208
209 #[test]
212 fn scaled_text_bbox() {
213 let event = CharEvent {
214 font_size: 24.0,
215 ..default_event()
216 };
217 let metrics = default_metrics();
218
219 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
220
221 assert_approx(ch.bbox.x0, 72.0, "x0");
225 assert_approx(ch.bbox.top, 54.0, "top");
226 assert_approx(ch.bbox.x1, 88.008, "x1");
227 assert_approx(ch.bbox.bottom, 78.0, "bottom");
228 assert_approx(ch.bbox.width(), 16.008, "width");
229 assert_approx(ch.bbox.height(), 24.0, "height");
230 assert_eq!(ch.size, 24.0);
231 }
232
233 #[test]
236 fn text_with_rise_bbox() {
237 let event = CharEvent {
238 rise: 5.0,
239 ..default_event()
240 };
241 let metrics = default_metrics();
242
243 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
244
245 assert_approx(ch.bbox.x0, 72.0, "x0");
250 assert_approx(ch.bbox.top, 58.0, "top");
251 assert_approx(ch.bbox.x1, 80.004, "x1");
252 assert_approx(ch.bbox.bottom, 70.0, "bottom");
253 assert_approx(ch.bbox.height(), 12.0, "height");
255 }
256
257 #[test]
260 fn rotated_text_matrix_bbox() {
261 let event = CharEvent {
262 text_matrix: [0.0, 1.0, -1.0, 0.0, 200.0, 400.0],
263 ..default_event()
264 };
265 let metrics = default_metrics();
266
267 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
268
269 assert_approx(ch.bbox.x0, 191.0, "x0");
277 assert_approx(ch.bbox.top, 383.996, "top");
278 assert_approx(ch.bbox.x1, 203.0, "x1");
279 assert_approx(ch.bbox.bottom, 392.0, "bottom");
280 assert_approx(ch.bbox.width(), 12.0, "width");
282 assert_approx(ch.bbox.height(), 8.004, "height");
283
284 assert!(!ch.upright);
285 assert_eq!(ch.direction, TextDirection::Btt);
287 }
288
289 #[test]
292 fn ctm_translation_bbox() {
293 let event = CharEvent {
294 ctm: [1.0, 0.0, 0.0, 1.0, 50.0, 50.0],
295 ..default_event()
296 };
297 let metrics = default_metrics();
298
299 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
300
301 assert_approx(ch.bbox.x0, 122.0, "x0");
306 assert_approx(ch.bbox.top, 13.0, "top");
307 assert_approx(ch.bbox.x1, 130.004, "x1");
308 assert_approx(ch.bbox.bottom, 25.0, "bottom");
309 }
310
311 #[test]
314 fn char_spacing_increases_width() {
315 let event = CharEvent {
316 char_spacing: 2.0, ..default_event()
318 };
319 let metrics = default_metrics();
320
321 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
322
323 assert_approx(ch.bbox.width(), 10.004, "width with char_spacing");
326 assert_approx(ch.bbox.height(), 12.0, "height");
328 }
329
330 #[test]
333 fn word_spacing_applied_for_space() {
334 let event = CharEvent {
335 char_code: 32, unicode: Some(" ".to_string()),
337 displacement: 250.0, word_spacing: 3.0,
339 ..default_event()
340 };
341 let metrics = FontMetrics::new(vec![250.0], 32, 32, 600.0, 750.0, -250.0, None);
342
343 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
344
345 assert_approx(ch.bbox.width(), 6.0, "width with word_spacing");
348 }
349
350 #[test]
351 fn word_spacing_not_applied_for_non_space() {
352 let event = CharEvent {
353 word_spacing: 3.0, ..default_event()
355 };
356 let metrics = default_metrics();
357
358 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
359
360 assert_approx(ch.bbox.width(), 8.004, "width without word_spacing");
363 }
364
365 #[test]
368 fn upright_for_horizontal_text() {
369 let event = default_event();
370 let metrics = default_metrics();
371
372 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
373 assert!(ch.upright);
374 }
375
376 #[test]
377 fn not_upright_for_rotated_text() {
378 let event = CharEvent {
379 text_matrix: [0.0, 1.0, -1.0, 0.0, 100.0, 500.0],
380 ..default_event()
381 };
382 let metrics = default_metrics();
383
384 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
385 assert!(!ch.upright);
386 }
387
388 #[test]
391 fn direction_ltr_for_normal_text() {
392 let event = default_event();
393 let metrics = default_metrics();
394
395 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
396 assert_eq!(ch.direction, TextDirection::Ltr);
397 }
398
399 #[test]
400 fn direction_rtl_for_mirrored_text() {
401 let event = CharEvent {
402 text_matrix: [-1.0, 0.0, 0.0, 1.0, 300.0, 720.0],
403 ..default_event()
404 };
405 let metrics = default_metrics();
406
407 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
408 assert_eq!(ch.direction, TextDirection::Rtl);
409 }
410
411 #[test]
412 fn direction_ttb_for_downward_text() {
413 let event = CharEvent {
415 text_matrix: [0.0, -1.0, 1.0, 0.0, 100.0, 700.0],
416 ..default_event()
417 };
418 let metrics = default_metrics();
419
420 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
421 assert_eq!(ch.direction, TextDirection::Ttb);
422 }
423
424 #[test]
425 fn direction_btt_for_upward_text() {
426 let event = CharEvent {
428 text_matrix: [0.0, 1.0, -1.0, 0.0, 100.0, 100.0],
429 ..default_event()
430 };
431 let metrics = default_metrics();
432
433 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
434 assert_eq!(ch.direction, TextDirection::Btt);
435 }
436
437 #[test]
440 fn unicode_from_event() {
441 let event = CharEvent {
442 unicode: Some("B".to_string()),
443 char_code: 66,
444 ..default_event()
445 };
446 let metrics = default_metrics();
447
448 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
449 assert_eq!(ch.text, "B");
450 }
451
452 #[test]
453 fn unicode_fallback_to_char_code() {
454 let event = CharEvent {
455 unicode: None,
456 char_code: 65, ..default_event()
458 };
459 let metrics = default_metrics();
460
461 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
462 assert_eq!(ch.text, "A"); }
464
465 #[test]
466 fn unicode_fallback_replacement_for_invalid() {
467 let event = CharEvent {
468 unicode: None,
469 char_code: 0xFFFFFFFF, ..default_event()
471 };
472 let metrics = default_metrics();
473
474 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
475 assert_eq!(ch.text, "\u{FFFD}");
476 }
477
478 #[test]
481 fn y_flip_converts_to_top_left_origin() {
482 let event = CharEvent {
484 text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 100.0],
485 ..default_event()
486 };
487 let metrics = default_metrics();
488
489 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
490
491 assert_approx(ch.bbox.top, 683.0, "top near page bottom");
494 assert_approx(ch.bbox.bottom, 695.0, "bottom near page bottom");
495 assert_approx(ch.doctop, 683.0, "doctop");
497 }
498
499 #[test]
502 fn colors_passed_through() {
503 let event = default_event();
504 let metrics = default_metrics();
505
506 let stroking = Some(Color::Rgb(1.0, 0.0, 0.0));
507 let non_stroking = Some(Color::Cmyk(0.0, 0.0, 0.0, 1.0));
508
509 let ch = char_from_event(
510 &event,
511 &metrics,
512 PAGE_HEIGHT,
513 stroking.clone(),
514 non_stroking.clone(),
515 );
516
517 assert_eq!(ch.stroking_color, stroking);
518 assert_eq!(ch.non_stroking_color, non_stroking);
519 }
520
521 #[test]
524 fn horizontal_scaling_affects_width() {
525 let event = CharEvent {
526 h_scaling: 0.5, ..default_event()
528 };
529 let metrics = default_metrics();
530
531 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
532
533 assert_approx(ch.bbox.width(), 4.002, "width at 50% h_scaling");
538 assert_approx(ch.bbox.height(), 12.0, "height at 50% h_scaling");
540 }
541
542 #[test]
545 fn default_metrics_produce_reasonable_bbox() {
546 let event = default_event();
547 let metrics = FontMetrics::default_metrics();
548
549 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
550
551 assert_approx(ch.bbox.height(), 12.0, "height with default metrics");
555 assert_approx(ch.bbox.width(), 8.004, "width with default metrics");
557 }
558
559 #[test]
562 fn ctm_scaling_affects_bbox() {
563 let event = CharEvent {
564 text_matrix: [1.0, 0.0, 0.0, 1.0, 36.0, 360.0],
565 ctm: [2.0, 0.0, 0.0, 2.0, 0.0, 0.0],
566 ..default_event()
567 };
568 let metrics = default_metrics();
569
570 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
571
572 assert_approx(ch.bbox.width(), 16.008, "width with 2x CTM");
575 assert_approx(ch.bbox.height(), 24.0, "height with 2x CTM");
576 }
577
578 #[test]
581 fn zero_font_size_does_not_panic() {
582 let event = CharEvent {
583 font_size: 0.0,
584 ..default_event()
585 };
586 let metrics = default_metrics();
587
588 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
590 assert_eq!(ch.size, 0.0);
591 }
592
593 #[test]
596 fn combined_spacing_for_space() {
597 let event = CharEvent {
598 char_code: 32,
599 unicode: Some(" ".to_string()),
600 displacement: 250.0,
601 char_spacing: 1.0,
602 word_spacing: 2.0,
603 ..default_event()
604 };
605 let metrics = FontMetrics::new(vec![250.0], 32, 32, 600.0, 750.0, -250.0, None);
606
607 let ch = char_from_event(&event, &metrics, PAGE_HEIGHT, None, None);
608
609 assert_approx(ch.bbox.width(), 6.0, "width with combined spacing");
612 }
613}