1use crate::error::Result;
7use crate::parser::content::{ContentOperation, ContentParser};
8use std::collections::VecDeque;
9
10#[derive(Debug, Clone)]
12pub struct TextChunk {
13 pub text: String,
15 pub x: f64,
17 pub y: f64,
19 pub font_size: f64,
21 pub font_name: Option<String>,
23}
24
25#[derive(Debug, Clone)]
27pub struct TextStreamOptions {
28 pub min_font_size: f64,
30 pub max_buffer_size: usize,
32 pub preserve_formatting: bool,
34 pub sort_by_position: bool,
36}
37
38impl Default for TextStreamOptions {
39 fn default() -> Self {
40 Self {
41 min_font_size: 0.0,
42 max_buffer_size: 1024 * 1024, preserve_formatting: true,
44 sort_by_position: true,
45 }
46 }
47}
48
49pub struct TextStreamer {
51 options: TextStreamOptions,
52 buffer: VecDeque<TextChunk>,
53 current_font: Option<String>,
54 current_font_size: f64,
55 current_x: f64,
56 current_y: f64,
57}
58
59impl TextStreamer {
60 pub fn new(options: TextStreamOptions) -> Self {
62 Self {
63 options,
64 buffer: VecDeque::new(),
65 current_font: None,
66 current_font_size: 12.0,
67 current_x: 0.0,
68 current_y: 0.0,
69 }
70 }
71
72 pub fn process_chunk(&mut self, data: &[u8]) -> Result<Vec<TextChunk>> {
74 let operations = ContentParser::parse(data)
75 .map_err(|e| crate::error::PdfError::ParseError(e.to_string()))?;
76
77 let mut chunks = Vec::new();
78
79 for op in operations {
80 match op {
81 ContentOperation::SetFont(name, size) => {
82 self.current_font = Some(name);
83 self.current_font_size = size as f64;
84 }
85 ContentOperation::MoveText(x, y) => {
86 self.current_x += x as f64;
87 self.current_y += y as f64;
88 }
89 ContentOperation::ShowText(bytes) => {
90 if self.current_font_size >= self.options.min_font_size {
91 let text = String::from_utf8_lossy(&bytes).to_string();
92 let chunk = TextChunk {
93 text,
94 x: self.current_x,
95 y: self.current_y,
96 font_size: self.current_font_size,
97 font_name: self.current_font.clone(),
98 };
99 chunks.push(chunk);
100 }
101 }
102 ContentOperation::BeginText => {
103 self.current_x = 0.0;
104 self.current_y = 0.0;
105 }
106 _ => {} }
108 }
109
110 for chunk in &chunks {
112 self.buffer.push_back(chunk.clone());
113 }
114
115 self.check_buffer_size();
117
118 Ok(chunks)
119 }
120
121 pub fn get_buffered_chunks(&self) -> Vec<TextChunk> {
123 self.buffer.iter().cloned().collect()
124 }
125
126 pub fn clear_buffer(&mut self) {
128 self.buffer.clear();
129 }
130
131 pub fn extract_text(&self) -> String {
133 let mut chunks = self.get_buffered_chunks();
134
135 if self.options.sort_by_position {
136 chunks.sort_by(|a, b| b.y.total_cmp(&a.y).then(a.x.total_cmp(&b.x)));
138 }
139
140 chunks
141 .into_iter()
142 .map(|chunk| chunk.text)
143 .collect::<Vec<_>>()
144 .join(" ")
145 }
146
147 fn check_buffer_size(&mut self) {
148 let total_size: usize = self.buffer.iter().map(|chunk| chunk.text.len()).sum();
149
150 while total_size > self.options.max_buffer_size && !self.buffer.is_empty() {
152 self.buffer.pop_front();
153 }
154 }
155}
156
157pub fn stream_text<F>(content_streams: Vec<Vec<u8>>, mut callback: F) -> Result<()>
159where
160 F: FnMut(TextChunk) -> Result<()>,
161{
162 let mut streamer = TextStreamer::new(TextStreamOptions::default());
163
164 for stream in content_streams {
165 let chunks = streamer.process_chunk(&stream)?;
166 for chunk in chunks {
167 callback(chunk)?;
168 }
169 }
170
171 Ok(())
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn test_text_chunk() {
180 let chunk = TextChunk {
181 text: "Hello".to_string(),
182 x: 100.0,
183 y: 700.0,
184 font_size: 12.0,
185 font_name: Some("Helvetica".to_string()),
186 };
187
188 assert_eq!(chunk.text, "Hello");
189 assert_eq!(chunk.x, 100.0);
190 assert_eq!(chunk.y, 700.0);
191 assert_eq!(chunk.font_size, 12.0);
192 assert_eq!(chunk.font_name, Some("Helvetica".to_string()));
193 }
194
195 #[test]
196 fn test_text_stream_options_default() {
197 let options = TextStreamOptions::default();
198 assert_eq!(options.min_font_size, 0.0);
199 assert_eq!(options.max_buffer_size, 1024 * 1024);
200 assert!(options.preserve_formatting);
201 assert!(options.sort_by_position);
202 }
203
204 #[test]
205 fn test_text_streamer_creation() {
206 let options = TextStreamOptions::default();
207 let streamer = TextStreamer::new(options);
208
209 assert!(streamer.buffer.is_empty());
210 assert_eq!(streamer.current_font_size, 12.0);
211 assert_eq!(streamer.current_x, 0.0);
212 assert_eq!(streamer.current_y, 0.0);
213 }
214
215 #[test]
216 fn test_process_chunk_text() {
217 let mut streamer = TextStreamer::new(TextStreamOptions::default());
218
219 let content = b"BT /F1 14 Tf 100 700 Td (Hello World) Tj ET";
221 let chunks = streamer.process_chunk(content).unwrap();
222
223 assert!(!chunks.is_empty());
224 assert_eq!(chunks[0].text, "Hello World");
225 assert_eq!(chunks[0].font_size, 14.0);
226 }
227
228 #[test]
229 fn test_min_font_size_filter() {
230 let mut options = TextStreamOptions::default();
231 options.min_font_size = 10.0;
232 let mut streamer = TextStreamer::new(options);
233
234 let content = b"BT /F1 8 Tf 100 700 Td (Small Text) Tj ET";
236 let chunks = streamer.process_chunk(content).unwrap();
237 assert!(chunks.is_empty());
238
239 let content = b"BT /F1 12 Tf 100 650 Td (Large Text) Tj ET";
241 let chunks = streamer.process_chunk(content).unwrap();
242 assert_eq!(chunks.len(), 1);
243 assert_eq!(chunks[0].text, "Large Text");
244 }
245
246 #[test]
247 fn test_extract_text_sorted() {
248 let mut streamer = TextStreamer::new(TextStreamOptions::default());
249
250 streamer.buffer.push_back(TextChunk {
252 text: "Bottom".to_string(),
253 x: 100.0,
254 y: 100.0,
255 font_size: 12.0,
256 font_name: None,
257 });
258
259 streamer.buffer.push_back(TextChunk {
260 text: "Top".to_string(),
261 x: 100.0,
262 y: 700.0,
263 font_size: 12.0,
264 font_name: None,
265 });
266
267 streamer.buffer.push_back(TextChunk {
268 text: "Middle".to_string(),
269 x: 100.0,
270 y: 400.0,
271 font_size: 12.0,
272 font_name: None,
273 });
274
275 let text = streamer.extract_text();
276 assert_eq!(text, "Top Middle Bottom");
277 }
278
279 #[test]
280 fn test_buffer_management() {
281 let mut options = TextStreamOptions::default();
282 options.max_buffer_size = 10; let mut streamer = TextStreamer::new(options);
284
285 for i in 0..5 {
287 streamer.buffer.push_back(TextChunk {
288 text: format!("Text{i}"),
289 x: 0.0,
290 y: 0.0,
291 font_size: 12.0,
292 font_name: None,
293 });
294 }
295
296 streamer.check_buffer_size();
297
298 assert!(streamer.buffer.len() < 5);
300 }
301
302 #[test]
303 fn test_stream_text_function() {
304 let content1 = b"BT /F1 12 Tf 100 700 Td (Page 1) Tj ET".to_vec();
305 let content2 = b"BT /F1 12 Tf 100 650 Td (Page 2) Tj ET".to_vec();
306 let streams = vec![content1, content2];
307
308 let mut collected = Vec::new();
309 stream_text(streams, |chunk| {
310 collected.push(chunk.text);
311 Ok(())
312 })
313 .unwrap();
314
315 assert_eq!(collected.len(), 2);
316 assert_eq!(collected[0], "Page 1");
317 assert_eq!(collected[1], "Page 2");
318 }
319
320 #[test]
321 fn test_text_chunk_debug_clone() {
322 let chunk = TextChunk {
323 text: "Test".to_string(),
324 x: 50.0,
325 y: 100.0,
326 font_size: 10.0,
327 font_name: Some("Arial".to_string()),
328 };
329
330 let debug_str = format!("{chunk:?}");
331 assert!(debug_str.contains("TextChunk"));
332 assert!(debug_str.contains("Test"));
333
334 let cloned = chunk.clone();
335 assert_eq!(cloned.text, chunk.text);
336 assert_eq!(cloned.x, chunk.x);
337 assert_eq!(cloned.y, chunk.y);
338 assert_eq!(cloned.font_size, chunk.font_size);
339 assert_eq!(cloned.font_name, chunk.font_name);
340 }
341
342 #[test]
343 fn test_text_stream_options_custom() {
344 let options = TextStreamOptions {
345 min_font_size: 8.0,
346 max_buffer_size: 2048,
347 preserve_formatting: false,
348 sort_by_position: false,
349 };
350
351 assert_eq!(options.min_font_size, 8.0);
352 assert_eq!(options.max_buffer_size, 2048);
353 assert!(!options.preserve_formatting);
354 assert!(!options.sort_by_position);
355 }
356
357 #[test]
358 fn test_text_stream_options_debug_clone() {
359 let options = TextStreamOptions::default();
360
361 let debug_str = format!("{options:?}");
362 assert!(debug_str.contains("TextStreamOptions"));
363
364 let cloned = options.clone();
365 assert_eq!(cloned.min_font_size, options.min_font_size);
366 assert_eq!(cloned.max_buffer_size, options.max_buffer_size);
367 assert_eq!(cloned.preserve_formatting, options.preserve_formatting);
368 assert_eq!(cloned.sort_by_position, options.sort_by_position);
369 }
370
371 #[test]
372 fn test_text_streamer_process_empty_chunk() {
373 let mut streamer = TextStreamer::new(TextStreamOptions::default());
374 let chunks = streamer.process_chunk(b"").unwrap();
375 assert!(chunks.is_empty());
376 }
377
378 #[test]
379 fn test_text_streamer_process_invalid_content() {
380 let mut streamer = TextStreamer::new(TextStreamOptions::default());
381 let content = b"Not valid PDF content";
383 let result = streamer.process_chunk(content);
384 match result {
386 Ok(chunks) => assert!(chunks.is_empty()),
387 Err(_) => {} }
389 }
390
391 #[test]
392 fn test_text_streamer_font_tracking() {
393 let mut streamer = TextStreamer::new(TextStreamOptions::default());
394
395 let content = b"BT /Helvetica-Bold 16 Tf ET";
397 let _ = streamer.process_chunk(content).unwrap();
398
399 assert_eq!(streamer.current_font, Some("Helvetica-Bold".to_string()));
400 assert_eq!(streamer.current_font_size, 16.0);
401 }
402
403 #[test]
404 fn test_text_streamer_position_tracking() {
405 let mut streamer = TextStreamer::new(TextStreamOptions::default());
406
407 let content = b"BT 50 100 Td ET";
409 let _ = streamer.process_chunk(content).unwrap();
410
411 assert_eq!(streamer.current_x, 50.0);
412 assert_eq!(streamer.current_y, 100.0);
413 }
414
415 #[test]
416 fn test_text_streamer_begin_text_resets_position() {
417 let mut streamer = TextStreamer::new(TextStreamOptions::default());
418
419 streamer.current_x = 100.0;
421 streamer.current_y = 200.0;
422
423 let content = b"BT ET";
425 let _ = streamer.process_chunk(content).unwrap();
426
427 assert_eq!(streamer.current_x, 0.0);
428 assert_eq!(streamer.current_y, 0.0);
429 }
430
431 #[test]
432 fn test_text_streamer_clear_buffer() {
433 let mut streamer = TextStreamer::new(TextStreamOptions::default());
434
435 streamer.buffer.push_back(TextChunk {
437 text: "Chunk1".to_string(),
438 x: 0.0,
439 y: 0.0,
440 font_size: 12.0,
441 font_name: None,
442 });
443 streamer.buffer.push_back(TextChunk {
444 text: "Chunk2".to_string(),
445 x: 0.0,
446 y: 0.0,
447 font_size: 12.0,
448 font_name: None,
449 });
450
451 assert_eq!(streamer.buffer.len(), 2);
452
453 streamer.clear_buffer();
454 assert!(streamer.buffer.is_empty());
455 }
456
457 #[test]
458 fn test_text_streamer_get_buffered_chunks() {
459 let mut streamer = TextStreamer::new(TextStreamOptions::default());
460
461 let chunk1 = TextChunk {
462 text: "First".to_string(),
463 x: 10.0,
464 y: 20.0,
465 font_size: 14.0,
466 font_name: Some("Times".to_string()),
467 };
468 let chunk2 = TextChunk {
469 text: "Second".to_string(),
470 x: 30.0,
471 y: 40.0,
472 font_size: 16.0,
473 font_name: Some("Arial".to_string()),
474 };
475
476 streamer.buffer.push_back(chunk1);
477 streamer.buffer.push_back(chunk2);
478
479 let chunks = streamer.get_buffered_chunks();
480 assert_eq!(chunks.len(), 2);
481 assert_eq!(chunks[0].text, "First");
482 assert_eq!(chunks[1].text, "Second");
483 }
484
485 #[test]
486 fn test_extract_text_no_sorting() {
487 let mut options = TextStreamOptions::default();
488 options.sort_by_position = false;
489 let mut streamer = TextStreamer::new(options);
490
491 streamer.buffer.push_back(TextChunk {
493 text: "First".to_string(),
494 x: 200.0,
495 y: 100.0,
496 font_size: 12.0,
497 font_name: None,
498 });
499 streamer.buffer.push_back(TextChunk {
500 text: "Second".to_string(),
501 x: 100.0,
502 y: 200.0,
503 font_size: 12.0,
504 font_name: None,
505 });
506
507 let text = streamer.extract_text();
508 assert_eq!(text, "First Second"); }
510
511 #[test]
512 fn test_extract_text_horizontal_sorting() {
513 let mut streamer = TextStreamer::new(TextStreamOptions::default());
514
515 streamer.buffer.push_back(TextChunk {
517 text: "Right".to_string(),
518 x: 300.0,
519 y: 500.0,
520 font_size: 12.0,
521 font_name: None,
522 });
523 streamer.buffer.push_back(TextChunk {
524 text: "Left".to_string(),
525 x: 100.0,
526 y: 500.0,
527 font_size: 12.0,
528 font_name: None,
529 });
530 streamer.buffer.push_back(TextChunk {
531 text: "Middle".to_string(),
532 x: 200.0,
533 y: 500.0,
534 font_size: 12.0,
535 font_name: None,
536 });
537
538 let text = streamer.extract_text();
539 assert_eq!(text, "Left Middle Right");
540 }
541
542 #[test]
543 fn test_check_buffer_size_edge_cases() {
544 let mut options = TextStreamOptions::default();
545 options.max_buffer_size = 20;
546 let mut streamer = TextStreamer::new(options);
547
548 streamer.buffer.push_back(TextChunk {
550 text: "a".repeat(20),
551 x: 0.0,
552 y: 0.0,
553 font_size: 12.0,
554 font_name: None,
555 });
556
557 streamer.check_buffer_size();
558 assert_eq!(streamer.buffer.len(), 1); streamer.buffer.push_back(TextChunk {
562 text: "b".to_string(),
563 x: 0.0,
564 y: 0.0,
565 font_size: 12.0,
566 font_name: None,
567 });
568
569 streamer.check_buffer_size();
570 assert!(streamer.buffer.len() <= 1);
572 }
573
574 #[test]
575 fn test_stream_text_with_error_callback() {
576 let content = b"BT /F1 12 Tf 100 700 Td (Test) Tj ET".to_vec();
577 let streams = vec![content];
578
579 let result = stream_text(streams, |_chunk| {
580 Err(crate::error::PdfError::ParseError("Test error".to_string()))
581 });
582
583 assert!(result.is_err());
584 }
585
586 #[test]
587 fn test_stream_text_empty_streams() {
588 let streams: Vec<Vec<u8>> = vec![];
589
590 let mut collected = Vec::new();
591 stream_text(streams, |chunk| {
592 collected.push(chunk);
593 Ok(())
594 })
595 .unwrap();
596
597 assert!(collected.is_empty());
598 }
599
600 #[test]
601 fn test_text_chunk_without_font_name() {
602 let chunk = TextChunk {
603 text: "No Font".to_string(),
604 x: 0.0,
605 y: 0.0,
606 font_size: 12.0,
607 font_name: None,
608 };
609
610 assert_eq!(chunk.font_name, None);
611 }
612
613 #[test]
614 fn test_process_chunk_multiple_operations() {
615 let mut streamer = TextStreamer::new(TextStreamOptions::default());
616
617 let content = b"BT /F1 10 Tf 100 700 Td (First) Tj 50 0 Td (Second) Tj ET";
619 let chunks = streamer.process_chunk(content).unwrap();
620
621 assert_eq!(chunks.len(), 2);
622 assert_eq!(chunks[0].text, "First");
623 assert_eq!(chunks[1].text, "Second");
624 assert_eq!(chunks[0].x, 100.0);
625 assert_eq!(chunks[1].x, 150.0); }
627
628 #[test]
629 fn test_buffer_size_calculation() {
630 let mut options = TextStreamOptions::default();
631 options.max_buffer_size = 100;
632 let mut streamer = TextStreamer::new(options);
633
634 for _i in 0..10 {
636 streamer.buffer.push_back(TextChunk {
637 text: "1234567890".to_string(), x: 0.0,
639 y: 0.0,
640 font_size: 12.0,
641 font_name: None,
642 });
643 }
644
645 streamer.check_buffer_size();
647
648 streamer.buffer.push_back(TextChunk {
650 text: "x".to_string(),
651 x: 0.0,
652 y: 0.0,
653 font_size: 12.0,
654 font_name: None,
655 });
656
657 streamer.check_buffer_size();
658
659 let total_size: usize = streamer.buffer.iter().map(|c| c.text.len()).sum();
661 assert!(total_size <= 100);
662 }
663
664 #[test]
665 fn test_text_chunk_extreme_positions() {
666 let chunk = TextChunk {
667 text: "Extreme".to_string(),
668 x: f64::MAX,
669 y: f64::MIN,
670 font_size: 0.1,
671 font_name: Some("TinyFont".to_string()),
672 };
673
674 assert_eq!(chunk.x, f64::MAX);
675 assert_eq!(chunk.y, f64::MIN);
676 assert_eq!(chunk.font_size, 0.1);
677 }
678
679 #[test]
680 fn test_text_streamer_accumulated_position() {
681 let mut streamer = TextStreamer::new(TextStreamOptions::default());
682
683 let content = b"BT 10 20 Td 5 10 Td 15 -5 Td ET";
685 let _ = streamer.process_chunk(content).unwrap();
686
687 assert_eq!(streamer.current_x, 30.0); assert_eq!(streamer.current_y, 25.0); }
690
691 #[test]
692 fn test_process_chunk_with_multiple_font_changes() {
693 let mut streamer = TextStreamer::new(TextStreamOptions::default());
694
695 let content = b"BT /F1 10 Tf (Small) Tj /F2 24 Tf (Large) Tj /F3 16 Tf (Medium) Tj ET";
696 let chunks = streamer.process_chunk(content).unwrap();
697
698 assert_eq!(chunks.len(), 3);
699 assert_eq!(chunks[0].font_size, 10.0);
700 assert_eq!(chunks[1].font_size, 24.0);
701 assert_eq!(chunks[2].font_size, 16.0);
702 }
703
704 #[test]
705 fn test_empty_text_operations() {
706 let mut streamer = TextStreamer::new(TextStreamOptions::default());
707
708 let content = b"BT /F1 12 Tf () Tj ( ) Tj ET";
710 let chunks = streamer.process_chunk(content).unwrap();
711
712 assert_eq!(chunks.len(), 2);
713 assert!(chunks[0].text.is_empty());
714 assert_eq!(chunks[1].text, " ");
715 }
716
717 #[test]
718 fn test_text_with_special_characters() {
719 let mut streamer = TextStreamer::new(TextStreamOptions::default());
720
721 let content = b"BT /F1 12 Tf (\xC3\xA9\xC3\xA0\xC3\xB1) Tj ET"; let chunks = streamer.process_chunk(content).unwrap();
723
724 assert!(!chunks.is_empty());
725 assert!(!chunks[0].text.is_empty());
727 }
728
729 #[test]
730 fn test_sorting_with_equal_positions() {
731 let mut streamer = TextStreamer::new(TextStreamOptions::default());
732
733 for i in 0..3 {
735 streamer.buffer.push_back(TextChunk {
736 text: format!("Text{i}"),
737 x: 100.0,
738 y: 100.0,
739 font_size: 12.0,
740 font_name: None,
741 });
742 }
743
744 let text = streamer.extract_text();
745 assert!(text.contains("Text0"));
747 assert!(text.contains("Text1"));
748 assert!(text.contains("Text2"));
749 }
750
751 #[test]
752 fn test_max_buffer_size_zero() {
753 let mut options = TextStreamOptions::default();
754 options.max_buffer_size = 0;
755 let mut streamer = TextStreamer::new(options);
756
757 streamer.buffer.push_back(TextChunk {
758 text: "Should be removed".to_string(),
759 x: 0.0,
760 y: 0.0,
761 font_size: 12.0,
762 font_name: None,
763 });
764
765 streamer.check_buffer_size();
766 assert!(streamer.buffer.is_empty());
767 }
768
769 #[test]
770 fn test_font_name_with_spaces() {
771 let mut streamer = TextStreamer::new(TextStreamOptions::default());
772
773 let content = b"BT /Times New Roman 14 Tf ET";
774 let result = streamer.process_chunk(content);
775
776 assert!(result.is_err());
778
779 assert_eq!(streamer.current_font, None);
781 assert_eq!(streamer.current_font_size, 12.0);
782 }
783
784 #[test]
785 fn test_stream_text_with_mixed_content() {
786 let content1 = b"BT /F1 8 Tf (Small) Tj ET".to_vec();
787 let content2 = b"Invalid content".to_vec();
788 let content3 = b"BT /F2 16 Tf (Large) Tj ET".to_vec();
789 let streams = vec![content1, content2, content3];
790
791 let mut collected = Vec::new();
792 let result = stream_text(streams, |chunk| {
793 collected.push(chunk.text);
794 Ok(())
795 });
796
797 assert!(result.is_ok() || result.is_err());
799 }
801
802 #[test]
803 fn test_preserve_formatting_option() {
804 let mut options = TextStreamOptions::default();
805 options.preserve_formatting = false;
806 let streamer = TextStreamer::new(options.clone());
807
808 assert!(!streamer.options.preserve_formatting);
809 assert_eq!(streamer.options.min_font_size, options.min_font_size);
810 }
811
812 #[test]
813 fn test_very_large_font_size() {
814 let mut streamer = TextStreamer::new(TextStreamOptions::default());
815
816 let content = b"BT /F1 9999 Tf (Huge) Tj ET";
817 let chunks = streamer.process_chunk(content).unwrap();
818
819 assert!(!chunks.is_empty());
820 assert_eq!(chunks[0].font_size, 9999.0);
821 assert_eq!(chunks[0].text, "Huge");
822 }
823
824 #[test]
825 fn test_negative_font_size() {
826 let mut options = TextStreamOptions::default();
827 options.min_font_size = -10.0; let mut streamer = TextStreamer::new(options);
829
830 streamer.current_font_size = -5.0;
831 let content = b"BT (Negative) Tj ET";
832 let chunks = streamer.process_chunk(content).unwrap();
833
834 assert!(!chunks.is_empty());
835 assert_eq!(chunks[0].font_size, -5.0);
836 }
837
838 #[test]
839 fn test_text_position_nan_handling() {
840 let mut streamer = TextStreamer::new(TextStreamOptions::default());
841
842 let chunk1 = TextChunk {
844 text: "NaN X".to_string(),
845 x: f64::NAN,
846 y: 100.0,
847 font_size: 12.0,
848 font_name: None,
849 };
850 let chunk2 = TextChunk {
851 text: "NaN Y".to_string(),
852 x: 100.0,
853 y: f64::NAN,
854 font_size: 12.0,
855 font_name: None,
856 };
857
858 streamer.buffer.push_back(chunk1);
859 streamer.buffer.push_back(chunk2);
860
861 let text = streamer.extract_text();
863 assert!(text.contains("NaN"));
864 }
865
866 #[test]
867 fn test_buffer_with_different_font_names() {
868 let mut streamer = TextStreamer::new(TextStreamOptions::default());
869
870 let fonts = ["Arial", "Times", "Courier", "Helvetica"];
871 for (i, font) in fonts.iter().enumerate() {
872 streamer.buffer.push_back(TextChunk {
873 text: format!("Font{i}"),
874 x: 0.0,
875 y: 0.0,
876 font_size: 12.0,
877 font_name: Some((*font).to_string()),
878 });
879 }
880
881 let chunks = streamer.get_buffered_chunks();
882 assert_eq!(chunks.len(), 4);
883 for (i, chunk) in chunks.iter().enumerate() {
884 assert_eq!(chunk.font_name, Some(fonts[i].to_string()));
885 }
886 }
887
888 #[test]
889 fn test_process_chunk_error_propagation() {
890 let mut streamer = TextStreamer::new(TextStreamOptions::default());
891
892 let content = b"\xFF\xFE\xFD\xFC"; let result = streamer.process_chunk(content);
895
896 assert!(result.is_ok() || result.is_err());
898 }
899
900 #[test]
901 fn test_extract_text_empty_buffer() {
902 let streamer = TextStreamer::new(TextStreamOptions::default());
903 let text = streamer.extract_text();
904 assert!(text.is_empty());
905 }
906
907 #[test]
908 fn test_extract_text_single_chunk() {
909 let mut streamer = TextStreamer::new(TextStreamOptions::default());
910
911 streamer.buffer.push_back(TextChunk {
912 text: "Single".to_string(),
913 x: 0.0,
914 y: 0.0,
915 font_size: 12.0,
916 font_name: None,
917 });
918
919 let text = streamer.extract_text();
920 assert_eq!(text, "Single");
921 }
922
923 #[test]
924 fn test_check_buffer_size_empty() {
925 let mut streamer = TextStreamer::new(TextStreamOptions::default());
926 streamer.check_buffer_size(); assert!(streamer.buffer.is_empty());
928 }
929
930 #[test]
931 fn test_complex_content_operations() {
932 let mut streamer = TextStreamer::new(TextStreamOptions::default());
933
934 let content = b"BT /F1 12 Tf 0 0 Td (Start) Tj ET q Q BT 50 50 Td (End) Tj ET";
936 let chunks = streamer.process_chunk(content).unwrap();
937
938 assert_eq!(chunks.len(), 2);
939 assert_eq!(chunks[0].text, "Start");
940 assert_eq!(chunks[1].text, "End");
941 assert_eq!(chunks[0].x, 0.0);
942 assert_eq!(chunks[1].x, 50.0);
943 }
944
945 #[test]
946 fn test_stream_text_callback_state() {
947 let content = b"BT /F1 12 Tf (Test) Tj ET".to_vec();
948 let streams = vec![content; 3]; let mut count = 0;
951 stream_text(streams, |_chunk| {
952 count += 1;
953 Ok(())
954 })
955 .unwrap();
956
957 assert_eq!(count, 3);
958 }
959}