1use crate::geometry::BBox;
2use crate::text::{Char, TextDirection, is_cjk_text};
3
4#[derive(Debug, Clone)]
6pub struct WordOptions {
7 pub x_tolerance: f64,
9 pub y_tolerance: f64,
11 pub keep_blank_chars: bool,
13 pub use_text_flow: bool,
15 pub text_direction: TextDirection,
17}
18
19impl Default for WordOptions {
20 fn default() -> Self {
21 Self {
22 x_tolerance: 3.0,
23 y_tolerance: 3.0,
24 keep_blank_chars: false,
25 use_text_flow: false,
26 text_direction: TextDirection::default(),
27 }
28 }
29}
30
31#[derive(Debug, Clone, PartialEq)]
33#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
34pub struct Word {
35 pub text: String,
37 pub bbox: BBox,
39 pub doctop: f64,
41 pub direction: TextDirection,
43 pub chars: Vec<Char>,
45}
46
47pub struct WordExtractor;
49
50impl WordExtractor {
51 pub fn extract(chars: &[Char], options: &WordOptions) -> Vec<Word> {
64 if chars.is_empty() {
65 return Vec::new();
66 }
67
68 let mut sorted_chars: Vec<&Char> = chars.iter().collect();
69 if !options.use_text_flow {
70 match options.text_direction {
71 TextDirection::Ttb => {
72 sorted_chars.sort_by(|a, b| {
74 b.bbox
75 .x0
76 .partial_cmp(&a.bbox.x0)
77 .unwrap()
78 .then(a.bbox.top.partial_cmp(&b.bbox.top).unwrap())
79 });
80 }
81 TextDirection::Btt => {
82 sorted_chars.sort_by(|a, b| {
84 b.bbox
85 .x0
86 .partial_cmp(&a.bbox.x0)
87 .unwrap()
88 .then(b.bbox.bottom.partial_cmp(&a.bbox.bottom).unwrap())
89 });
90 }
91 _ => {
92 sorted_chars.sort_by(|a, b| {
94 a.bbox
95 .top
96 .partial_cmp(&b.bbox.top)
97 .unwrap()
98 .then(a.bbox.x0.partial_cmp(&b.bbox.x0).unwrap())
99 });
100 }
101 }
102 }
103
104 let is_vertical = matches!(
105 options.text_direction,
106 TextDirection::Ttb | TextDirection::Btt
107 );
108
109 let mut words = Vec::new();
110 let mut current_chars: Vec<Char> = Vec::new();
111
112 for &ch in &sorted_chars {
113 let is_blank = ch.text.chars().all(|c| c.is_whitespace());
114
115 if is_blank && !options.keep_blank_chars {
117 if !current_chars.is_empty() {
118 words.push(Self::make_word(¤t_chars));
119 current_chars.clear();
120 }
121 continue;
122 }
123
124 if current_chars.is_empty() {
125 current_chars.push(ch.clone());
126 continue;
127 }
128
129 let last = current_chars.last().unwrap();
130
131 let should_split = if is_vertical {
132 Self::should_split_vertical(last, ch, options)
133 } else {
134 Self::should_split_horizontal(last, ch, options)
135 };
136
137 if should_split {
138 words.push(Self::make_word(¤t_chars));
139 current_chars.clear();
140 }
141
142 current_chars.push(ch.clone());
143 }
144
145 if !current_chars.is_empty() {
146 words.push(Self::make_word(¤t_chars));
147 }
148
149 words
150 }
151
152 fn effective_x_tolerance(last: &Char, current: &Char, base: f64) -> f64 {
157 if is_cjk_text(&last.text) || is_cjk_text(¤t.text) {
158 last.bbox.width().max(base)
159 } else {
160 base
161 }
162 }
163
164 fn effective_y_tolerance(last: &Char, current: &Char, base: f64) -> f64 {
166 if is_cjk_text(&last.text) || is_cjk_text(¤t.text) {
167 last.bbox.height().max(base)
168 } else {
169 base
170 }
171 }
172
173 fn should_split_horizontal(last: &Char, current: &Char, options: &WordOptions) -> bool {
175 let x_gap = current.bbox.x0 - last.bbox.x1;
176 let y_diff = (current.bbox.top - last.bbox.top).abs();
177 let x_tol = Self::effective_x_tolerance(last, current, options.x_tolerance);
178 x_gap > x_tol || y_diff > options.y_tolerance
179 }
180
181 fn should_split_vertical(last: &Char, current: &Char, options: &WordOptions) -> bool {
183 let y_gap = current.bbox.top - last.bbox.bottom;
184 let x_diff = (current.bbox.x0 - last.bbox.x0).abs();
185 let y_tol = Self::effective_y_tolerance(last, current, options.y_tolerance);
186 y_gap > y_tol || x_diff > options.x_tolerance
187 }
188
189 fn make_word(chars: &[Char]) -> Word {
190 let text: String = chars.iter().map(|c| c.text.as_str()).collect();
191 let bbox = chars
192 .iter()
193 .map(|c| c.bbox)
194 .reduce(|a, b| a.union(&b))
195 .expect("make_word called with non-empty chars");
196 let doctop = chars.iter().map(|c| c.doctop).fold(f64::INFINITY, f64::min);
197 let direction = chars[0].direction;
198 Word {
199 text,
200 bbox,
201 doctop,
202 direction,
203 chars: chars.to_vec(),
204 }
205 }
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211
212 fn make_char(text: &str, x0: f64, top: f64, x1: f64, bottom: f64) -> Char {
213 Char {
214 text: text.to_string(),
215 bbox: BBox::new(x0, top, x1, bottom),
216 fontname: "TestFont".to_string(),
217 size: 12.0,
218 doctop: top,
219 upright: true,
220 direction: TextDirection::Ltr,
221 stroking_color: None,
222 non_stroking_color: None,
223 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
224 char_code: 0,
225 }
226 }
227
228 #[test]
229 fn test_word_has_doctop_and_direction() {
230 let chars = vec![
231 make_char("A", 10.0, 100.0, 20.0, 112.0),
232 make_char("B", 20.0, 100.0, 30.0, 112.0),
233 ];
234 let words = WordExtractor::extract(&chars, &WordOptions::default());
235 assert_eq!(words.len(), 1);
236 assert_eq!(words[0].doctop, 100.0);
237 assert_eq!(words[0].direction, TextDirection::Ltr);
238 }
239
240 #[test]
241 fn test_word_doctop_uses_min_char_doctop() {
242 let mut chars = vec![
244 make_char("X", 10.0, 100.0, 20.0, 112.0),
245 make_char("Y", 20.0, 100.0, 30.0, 112.0),
246 ];
247 chars[0].doctop = 900.0;
248 chars[1].doctop = 892.0;
249 let words = WordExtractor::extract(&chars, &WordOptions::default());
250 assert_eq!(words[0].doctop, 892.0);
251 }
252
253 #[test]
254 fn test_default_options() {
255 let opts = WordOptions::default();
256 assert_eq!(opts.x_tolerance, 3.0);
257 assert_eq!(opts.y_tolerance, 3.0);
258 assert!(!opts.keep_blank_chars);
259 assert!(!opts.use_text_flow);
260 }
261
262 #[test]
263 fn test_empty_chars() {
264 let words = WordExtractor::extract(&[], &WordOptions::default());
265 assert!(words.is_empty());
266 }
267
268 #[test]
269 fn test_single_char() {
270 let chars = vec![make_char("A", 10.0, 100.0, 20.0, 112.0)];
271 let words = WordExtractor::extract(&chars, &WordOptions::default());
272 assert_eq!(words.len(), 1);
273 assert_eq!(words[0].text, "A");
274 assert_eq!(words[0].chars.len(), 1);
275 }
276
277 #[test]
278 fn test_simple_horizontal_text() {
279 let chars = vec![
281 make_char("H", 10.0, 100.0, 20.0, 112.0),
282 make_char("e", 20.0, 100.0, 30.0, 112.0),
283 make_char("l", 30.0, 100.0, 35.0, 112.0),
284 make_char("l", 35.0, 100.0, 40.0, 112.0),
285 make_char("o", 40.0, 100.0, 50.0, 112.0),
286 ];
287 let words = WordExtractor::extract(&chars, &WordOptions::default());
288 assert_eq!(words.len(), 1);
289 assert_eq!(words[0].text, "Hello");
290 assert_eq!(words[0].bbox, BBox::new(10.0, 100.0, 50.0, 112.0));
291 assert_eq!(words[0].chars.len(), 5);
292 }
293
294 #[test]
295 fn test_multi_line_text() {
296 let chars = vec![
298 make_char("H", 10.0, 100.0, 20.0, 112.0),
299 make_char("i", 20.0, 100.0, 30.0, 112.0),
300 make_char("L", 10.0, 120.0, 20.0, 132.0),
301 make_char("o", 20.0, 120.0, 30.0, 132.0),
302 ];
303 let words = WordExtractor::extract(&chars, &WordOptions::default());
304 assert_eq!(words.len(), 2);
305 assert_eq!(words[0].text, "Hi");
306 assert_eq!(words[1].text, "Lo");
307 }
308
309 #[test]
310 fn test_text_with_large_gap() {
311 let chars = vec![
313 make_char("A", 10.0, 100.0, 20.0, 112.0),
314 make_char("B", 20.0, 100.0, 30.0, 112.0),
315 make_char("C", 50.0, 100.0, 60.0, 112.0), make_char("D", 60.0, 100.0, 70.0, 112.0),
317 ];
318 let words = WordExtractor::extract(&chars, &WordOptions::default());
319 assert_eq!(words.len(), 2);
320 assert_eq!(words[0].text, "AB");
321 assert_eq!(words[1].text, "CD");
322 }
323
324 #[test]
325 fn test_text_with_small_gap_within_tolerance() {
326 let chars = vec![
328 make_char("A", 10.0, 100.0, 20.0, 112.0),
329 make_char("B", 22.0, 100.0, 32.0, 112.0), ];
331 let words = WordExtractor::extract(&chars, &WordOptions::default());
332 assert_eq!(words.len(), 1);
333 assert_eq!(words[0].text, "AB");
334 }
335
336 #[test]
337 fn test_split_on_space_char() {
338 let chars = vec![
340 make_char("A", 10.0, 100.0, 20.0, 112.0),
341 make_char(" ", 20.0, 100.0, 25.0, 112.0),
342 make_char("B", 25.0, 100.0, 35.0, 112.0),
343 ];
344 let words = WordExtractor::extract(&chars, &WordOptions::default());
345 assert_eq!(words.len(), 2);
346 assert_eq!(words[0].text, "A");
347 assert_eq!(words[1].text, "B");
348 }
349
350 #[test]
351 fn test_keep_blank_chars_true() {
352 let chars = vec![
354 make_char("A", 10.0, 100.0, 20.0, 112.0),
355 make_char(" ", 20.0, 100.0, 25.0, 112.0),
356 make_char("B", 25.0, 100.0, 35.0, 112.0),
357 ];
358 let opts = WordOptions {
359 keep_blank_chars: true,
360 ..WordOptions::default()
361 };
362 let words = WordExtractor::extract(&chars, &opts);
363 assert_eq!(words.len(), 1);
364 assert_eq!(words[0].text, "A B");
365 }
366
367 #[test]
368 fn test_configurable_x_tolerance() {
369 let chars = vec![
371 make_char("A", 10.0, 100.0, 20.0, 112.0),
372 make_char("B", 30.0, 100.0, 40.0, 112.0), ];
374
375 let words = WordExtractor::extract(&chars, &WordOptions::default());
377 assert_eq!(words.len(), 2);
378
379 let opts = WordOptions {
381 x_tolerance: 15.0,
382 ..WordOptions::default()
383 };
384 let words = WordExtractor::extract(&chars, &opts);
385 assert_eq!(words.len(), 1);
386 assert_eq!(words[0].text, "AB");
387 }
388
389 #[test]
390 fn test_configurable_y_tolerance() {
391 let chars = vec![
393 make_char("A", 10.0, 100.0, 20.0, 112.0),
394 make_char("B", 20.0, 105.0, 30.0, 117.0), ];
396
397 let words = WordExtractor::extract(&chars, &WordOptions::default());
399 assert_eq!(words.len(), 2);
400
401 let opts = WordOptions {
403 y_tolerance: 10.0,
404 ..WordOptions::default()
405 };
406 let words = WordExtractor::extract(&chars, &opts);
407 assert_eq!(words.len(), 1);
408 assert_eq!(words[0].text, "AB");
409 }
410
411 #[test]
412 fn test_word_bbox_is_union_of_char_bboxes() {
413 let chars = vec![
415 make_char("A", 10.0, 98.0, 20.0, 112.0),
416 make_char("b", 20.0, 100.0, 28.0, 110.0),
417 make_char("C", 28.0, 97.0, 38.0, 113.0),
418 ];
419 let words = WordExtractor::extract(&chars, &WordOptions::default());
420 assert_eq!(words.len(), 1);
421 assert_eq!(words[0].bbox, BBox::new(10.0, 97.0, 38.0, 113.0));
422 }
423
424 #[test]
425 fn test_unsorted_chars_are_sorted_spatially() {
426 let chars = vec![
428 make_char("B", 20.0, 100.0, 30.0, 112.0),
429 make_char("A", 10.0, 100.0, 20.0, 112.0),
430 ];
431 let words = WordExtractor::extract(&chars, &WordOptions::default());
432 assert_eq!(words.len(), 1);
433 assert_eq!(words[0].text, "AB");
434 }
435
436 #[test]
437 fn test_use_text_flow_preserves_order() {
438 let chars = vec![
440 make_char("B", 20.0, 100.0, 30.0, 112.0),
441 make_char("A", 10.0, 100.0, 20.0, 112.0),
442 ];
443 let opts = WordOptions {
444 use_text_flow: true,
445 ..WordOptions::default()
446 };
447 let words = WordExtractor::extract(&chars, &opts);
448 assert_eq!(words.len(), 1);
451 assert_eq!(words[0].text, "BA");
452 }
453
454 #[test]
455 fn test_multiple_spaces_between_words() {
456 let chars = vec![
458 make_char("A", 10.0, 100.0, 20.0, 112.0),
459 make_char(" ", 20.0, 100.0, 25.0, 112.0),
460 make_char(" ", 25.0, 100.0, 30.0, 112.0),
461 make_char("B", 30.0, 100.0, 40.0, 112.0),
462 ];
463 let words = WordExtractor::extract(&chars, &WordOptions::default());
464 assert_eq!(words.len(), 2);
465 assert_eq!(words[0].text, "A");
466 assert_eq!(words[1].text, "B");
467 }
468
469 #[test]
470 fn test_leading_spaces_ignored() {
471 let chars = vec![
472 make_char(" ", 5.0, 100.0, 10.0, 112.0),
473 make_char("A", 10.0, 100.0, 20.0, 112.0),
474 ];
475 let words = WordExtractor::extract(&chars, &WordOptions::default());
476 assert_eq!(words.len(), 1);
477 assert_eq!(words[0].text, "A");
478 }
479
480 #[test]
481 fn test_trailing_spaces_ignored() {
482 let chars = vec![
483 make_char("A", 10.0, 100.0, 20.0, 112.0),
484 make_char(" ", 20.0, 100.0, 25.0, 112.0),
485 ];
486 let words = WordExtractor::extract(&chars, &WordOptions::default());
487 assert_eq!(words.len(), 1);
488 assert_eq!(words[0].text, "A");
489 }
490
491 #[test]
492 fn test_overlapping_chars_grouped() {
493 let chars = vec![
495 make_char("f", 10.0, 100.0, 20.0, 112.0),
496 make_char("i", 18.0, 100.0, 25.0, 112.0), ];
498 let words = WordExtractor::extract(&chars, &WordOptions::default());
499 assert_eq!(words.len(), 1);
500 assert_eq!(words[0].text, "fi");
501 }
502
503 #[test]
504 fn test_three_words_on_one_line() {
505 let chars = vec![
507 make_char("T", 10.0, 100.0, 20.0, 112.0),
508 make_char("h", 20.0, 100.0, 28.0, 112.0),
509 make_char("e", 28.0, 100.0, 36.0, 112.0),
510 make_char(" ", 36.0, 100.0, 40.0, 112.0),
511 make_char("q", 40.0, 100.0, 48.0, 112.0),
512 make_char("u", 48.0, 100.0, 56.0, 112.0),
513 make_char("i", 56.0, 100.0, 60.0, 112.0),
514 make_char("c", 60.0, 100.0, 68.0, 112.0),
515 make_char("k", 68.0, 100.0, 76.0, 112.0),
516 make_char(" ", 76.0, 100.0, 80.0, 112.0),
517 make_char("f", 80.0, 100.0, 88.0, 112.0),
518 make_char("o", 88.0, 100.0, 96.0, 112.0),
519 make_char("x", 96.0, 100.0, 104.0, 112.0),
520 ];
521 let words = WordExtractor::extract(&chars, &WordOptions::default());
522 assert_eq!(words.len(), 3);
523 assert_eq!(words[0].text, "The");
524 assert_eq!(words[1].text, "quick");
525 assert_eq!(words[2].text, "fox");
526 }
527
528 #[test]
529 fn test_multiline_sorting() {
530 let chars = vec![
532 make_char("C", 10.0, 120.0, 20.0, 132.0), make_char("A", 10.0, 100.0, 20.0, 112.0), make_char("D", 20.0, 120.0, 30.0, 132.0), make_char("B", 20.0, 100.0, 30.0, 112.0), ];
537 let words = WordExtractor::extract(&chars, &WordOptions::default());
538 assert_eq!(words.len(), 2);
539 assert_eq!(words[0].text, "AB");
540 assert_eq!(words[1].text, "CD");
541 }
542
543 fn make_cjk_char(text: &str, x0: f64, top: f64, width: f64, height: f64) -> Char {
547 Char {
548 text: text.to_string(),
549 bbox: BBox::new(x0, top, x0 + width, top + height),
550 fontname: "SimSun".to_string(),
551 size: 12.0,
552 doctop: top,
553 upright: true,
554 direction: TextDirection::Ltr,
555 stroking_color: None,
556 non_stroking_color: None,
557 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
558 char_code: 0,
559 }
560 }
561
562 #[test]
563 fn test_chinese_text_grouping() {
564 let chars = vec![
567 make_cjk_char("中", 10.0, 100.0, 12.0, 12.0),
568 make_cjk_char("国", 23.0, 100.0, 12.0, 12.0), make_cjk_char("人", 36.0, 100.0, 12.0, 12.0), ];
571 let words = WordExtractor::extract(&chars, &WordOptions::default());
572 assert_eq!(words.len(), 1);
573 assert_eq!(words[0].text, "中国人");
574 }
575
576 #[test]
577 fn test_chinese_text_with_larger_gap_uses_char_width_tolerance() {
578 let chars = vec![
581 make_cjk_char("中", 10.0, 100.0, 12.0, 12.0),
582 make_cjk_char("国", 30.0, 100.0, 12.0, 12.0), ];
584 let words = WordExtractor::extract(&chars, &WordOptions::default());
585 assert_eq!(
586 words.len(),
587 1,
588 "CJK chars within char-width tolerance should group"
589 );
590 assert_eq!(words[0].text, "中国");
591 }
592
593 #[test]
594 fn test_chinese_text_large_gap_splits() {
595 let chars = vec![
597 make_cjk_char("中", 10.0, 100.0, 12.0, 12.0),
598 make_cjk_char("国", 37.0, 100.0, 12.0, 12.0), ];
600 let words = WordExtractor::extract(&chars, &WordOptions::default());
601 assert_eq!(
602 words.len(),
603 2,
604 "CJK chars beyond char-width tolerance should split"
605 );
606 assert_eq!(words[0].text, "中");
607 assert_eq!(words[1].text, "国");
608 }
609
610 #[test]
611 fn test_japanese_mixed_text() {
612 let chars = vec![
614 make_cjk_char("日", 10.0, 100.0, 12.0, 12.0),
615 make_cjk_char("本", 23.0, 100.0, 12.0, 12.0), make_cjk_char("語", 36.0, 100.0, 12.0, 12.0), make_char("a", 49.0, 100.0, 55.0, 112.0), make_char("b", 55.0, 100.0, 61.0, 112.0), make_char("c", 61.0, 100.0, 67.0, 112.0), ];
621 let words = WordExtractor::extract(&chars, &WordOptions::default());
622 assert_eq!(words.len(), 1);
623 assert_eq!(words[0].text, "日本語abc");
624 }
625
626 #[test]
627 fn test_korean_text_grouping() {
628 let chars = vec![
630 make_cjk_char("한", 10.0, 100.0, 12.0, 12.0),
631 make_cjk_char("글", 23.0, 100.0, 12.0, 12.0), ];
633 let words = WordExtractor::extract(&chars, &WordOptions::default());
634 assert_eq!(words.len(), 1);
635 assert_eq!(words[0].text, "한글");
636 }
637
638 #[test]
639 fn test_mixed_cjk_latin_with_gap() {
640 let chars = vec![
642 make_char("H", 10.0, 100.0, 18.0, 112.0),
643 make_char("e", 18.0, 100.0, 24.0, 112.0),
644 make_char("l", 24.0, 100.0, 28.0, 112.0),
645 make_char("l", 28.0, 100.0, 32.0, 112.0),
646 make_char("o", 32.0, 100.0, 38.0, 112.0),
647 make_cjk_char("中", 58.0, 100.0, 12.0, 12.0),
649 make_cjk_char("国", 71.0, 100.0, 12.0, 12.0), ];
651 let words = WordExtractor::extract(&chars, &WordOptions::default());
652 assert_eq!(words.len(), 2);
653 assert_eq!(words[0].text, "Hello");
654 assert_eq!(words[1].text, "中国");
655 }
656
657 #[test]
658 fn test_cjk_transition_to_latin_uses_cjk_tolerance() {
659 let chars = vec![
661 make_cjk_char("中", 10.0, 100.0, 12.0, 12.0),
662 make_char("A", 27.0, 100.0, 33.0, 112.0), ];
664 let words = WordExtractor::extract(&chars, &WordOptions::default());
665 assert_eq!(
666 words.len(),
667 1,
668 "CJK-to-Latin transition should use CJK tolerance"
669 );
670 assert_eq!(words[0].text, "中A");
671 }
672
673 #[test]
674 fn test_vertical_text_chinese() {
675 let chars = vec![
678 make_cjk_char("中", 100.0, 10.0, 12.0, 12.0),
679 make_cjk_char("国", 100.0, 23.0, 12.0, 12.0), make_cjk_char("人", 100.0, 36.0, 12.0, 12.0), ];
682 let opts = WordOptions {
683 text_direction: TextDirection::Ttb,
684 ..WordOptions::default()
685 };
686 let words = WordExtractor::extract(&chars, &opts);
687 assert_eq!(words.len(), 1);
688 assert_eq!(words[0].text, "中国人");
689 }
690
691 #[test]
692 fn test_vertical_text_two_columns() {
693 let chars = vec![
696 make_cjk_char("一", 100.0, 10.0, 12.0, 12.0),
698 make_cjk_char("二", 100.0, 23.0, 12.0, 12.0),
699 make_cjk_char("三", 70.0, 10.0, 12.0, 12.0),
701 make_cjk_char("四", 70.0, 23.0, 12.0, 12.0),
702 ];
703 let opts = WordOptions {
704 text_direction: TextDirection::Ttb,
705 ..WordOptions::default()
706 };
707 let words = WordExtractor::extract(&chars, &opts);
708 assert_eq!(words.len(), 2);
709 assert_eq!(words[0].text, "一二");
711 assert_eq!(words[1].text, "三四");
712 }
713
714 #[test]
715 fn test_vertical_text_with_gap() {
716 let chars = vec![
718 make_cjk_char("上", 100.0, 10.0, 12.0, 12.0),
719 make_cjk_char("下", 100.0, 40.0, 12.0, 12.0), ];
721 let opts = WordOptions {
722 text_direction: TextDirection::Ttb,
723 ..WordOptions::default()
724 };
725 let words = WordExtractor::extract(&chars, &opts);
726 assert_eq!(
727 words.len(),
728 2,
729 "Vertical CJK chars with large gap should split"
730 );
731 assert_eq!(words[0].text, "上");
732 assert_eq!(words[1].text, "下");
733 }
734
735 #[test]
736 fn test_cjk_with_space_splits() {
737 let chars = vec![
739 make_cjk_char("中", 10.0, 100.0, 12.0, 12.0),
740 Char {
741 text: " ".to_string(),
742 bbox: BBox::new(22.0, 100.0, 25.0, 112.0),
743 fontname: "SimSun".to_string(),
744 size: 12.0,
745 doctop: 100.0,
746 upright: true,
747 direction: TextDirection::Ltr,
748 stroking_color: None,
749 non_stroking_color: None,
750 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
751 char_code: 32,
752 },
753 make_cjk_char("国", 25.0, 100.0, 12.0, 12.0),
754 ];
755 let words = WordExtractor::extract(&chars, &WordOptions::default());
756 assert_eq!(words.len(), 2);
757 assert_eq!(words[0].text, "中");
758 assert_eq!(words[1].text, "国");
759 }
760}