1use crate::error::Result;
7use crate::parser::content::{ContentOperation, ContentParser};
8use std::collections::VecDeque;
9
10#[derive(Debug, Clone)]
12pub struct TextChunk {
13 pub text: String,
15 pub x: f64,
17 pub y: f64,
19 pub font_size: f64,
21 pub font_name: Option<String>,
23}
24
25#[derive(Debug, Clone)]
27pub struct TextStreamOptions {
28 pub min_font_size: f64,
30 pub max_buffer_size: usize,
32 pub preserve_formatting: bool,
34 pub sort_by_position: bool,
36}
37
38impl Default for TextStreamOptions {
39 fn default() -> Self {
40 Self {
41 min_font_size: 0.0,
42 max_buffer_size: 1024 * 1024, preserve_formatting: true,
44 sort_by_position: true,
45 }
46 }
47}
48
49pub struct TextStreamer {
51 options: TextStreamOptions,
52 buffer: VecDeque<TextChunk>,
53 current_font: Option<String>,
54 current_font_size: f64,
55 current_x: f64,
56 current_y: f64,
57}
58
59impl TextStreamer {
60 pub fn new(options: TextStreamOptions) -> Self {
62 Self {
63 options,
64 buffer: VecDeque::new(),
65 current_font: None,
66 current_font_size: 12.0,
67 current_x: 0.0,
68 current_y: 0.0,
69 }
70 }
71
72 pub fn process_chunk(&mut self, data: &[u8]) -> Result<Vec<TextChunk>> {
74 let operations = ContentParser::parse(data)
75 .map_err(|e| crate::error::PdfError::ParseError(e.to_string()))?;
76
77 let mut chunks = Vec::new();
78
79 for op in operations {
80 match op {
81 ContentOperation::SetFont(name, size) => {
82 self.current_font = Some(name);
83 self.current_font_size = size as f64;
84 }
85 ContentOperation::MoveText(x, y) => {
86 self.current_x += x as f64;
87 self.current_y += y as f64;
88 }
89 ContentOperation::ShowText(bytes) => {
90 if self.current_font_size >= self.options.min_font_size {
91 let text = String::from_utf8_lossy(&bytes).to_string();
92 let chunk = TextChunk {
93 text,
94 x: self.current_x,
95 y: self.current_y,
96 font_size: self.current_font_size,
97 font_name: self.current_font.clone(),
98 };
99 chunks.push(chunk);
100 }
101 }
102 ContentOperation::BeginText => {
103 self.current_x = 0.0;
104 self.current_y = 0.0;
105 }
106 _ => {} }
108 }
109
110 for chunk in &chunks {
112 self.buffer.push_back(chunk.clone());
113 }
114
115 self.check_buffer_size();
117
118 Ok(chunks)
119 }
120
121 pub fn get_buffered_chunks(&self) -> Vec<TextChunk> {
123 self.buffer.iter().cloned().collect()
124 }
125
126 pub fn clear_buffer(&mut self) {
128 self.buffer.clear();
129 }
130
131 pub fn extract_text(&self) -> String {
133 let mut chunks = self.get_buffered_chunks();
134
135 if self.options.sort_by_position {
136 chunks.sort_by(|a, b| {
138 b.y.partial_cmp(&a.y)
139 .unwrap_or(std::cmp::Ordering::Equal)
140 .then(a.x.partial_cmp(&b.x).unwrap_or(std::cmp::Ordering::Equal))
141 });
142 }
143
144 chunks
145 .into_iter()
146 .map(|chunk| chunk.text)
147 .collect::<Vec<_>>()
148 .join(" ")
149 }
150
151 fn check_buffer_size(&mut self) {
152 let total_size: usize = self.buffer.iter().map(|chunk| chunk.text.len()).sum();
153
154 while total_size > self.options.max_buffer_size && !self.buffer.is_empty() {
156 self.buffer.pop_front();
157 }
158 }
159}
160
161pub fn stream_text<F>(content_streams: Vec<Vec<u8>>, mut callback: F) -> Result<()>
163where
164 F: FnMut(TextChunk) -> Result<()>,
165{
166 let mut streamer = TextStreamer::new(TextStreamOptions::default());
167
168 for stream in content_streams {
169 let chunks = streamer.process_chunk(&stream)?;
170 for chunk in chunks {
171 callback(chunk)?;
172 }
173 }
174
175 Ok(())
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn test_text_chunk() {
184 let chunk = TextChunk {
185 text: "Hello".to_string(),
186 x: 100.0,
187 y: 700.0,
188 font_size: 12.0,
189 font_name: Some("Helvetica".to_string()),
190 };
191
192 assert_eq!(chunk.text, "Hello");
193 assert_eq!(chunk.x, 100.0);
194 assert_eq!(chunk.y, 700.0);
195 assert_eq!(chunk.font_size, 12.0);
196 assert_eq!(chunk.font_name, Some("Helvetica".to_string()));
197 }
198
199 #[test]
200 fn test_text_stream_options_default() {
201 let options = TextStreamOptions::default();
202 assert_eq!(options.min_font_size, 0.0);
203 assert_eq!(options.max_buffer_size, 1024 * 1024);
204 assert!(options.preserve_formatting);
205 assert!(options.sort_by_position);
206 }
207
208 #[test]
209 fn test_text_streamer_creation() {
210 let options = TextStreamOptions::default();
211 let streamer = TextStreamer::new(options);
212
213 assert!(streamer.buffer.is_empty());
214 assert_eq!(streamer.current_font_size, 12.0);
215 assert_eq!(streamer.current_x, 0.0);
216 assert_eq!(streamer.current_y, 0.0);
217 }
218
219 #[test]
220 fn test_process_chunk_text() {
221 let mut streamer = TextStreamer::new(TextStreamOptions::default());
222
223 let content = b"BT /F1 14 Tf 100 700 Td (Hello World) Tj ET";
225 let chunks = streamer.process_chunk(content).unwrap();
226
227 assert!(!chunks.is_empty());
228 assert_eq!(chunks[0].text, "Hello World");
229 assert_eq!(chunks[0].font_size, 14.0);
230 }
231
232 #[test]
233 fn test_min_font_size_filter() {
234 let mut options = TextStreamOptions::default();
235 options.min_font_size = 10.0;
236 let mut streamer = TextStreamer::new(options);
237
238 let content = b"BT /F1 8 Tf 100 700 Td (Small Text) Tj ET";
240 let chunks = streamer.process_chunk(content).unwrap();
241 assert!(chunks.is_empty());
242
243 let content = b"BT /F1 12 Tf 100 650 Td (Large Text) Tj ET";
245 let chunks = streamer.process_chunk(content).unwrap();
246 assert_eq!(chunks.len(), 1);
247 assert_eq!(chunks[0].text, "Large Text");
248 }
249
250 #[test]
251 fn test_extract_text_sorted() {
252 let mut streamer = TextStreamer::new(TextStreamOptions::default());
253
254 streamer.buffer.push_back(TextChunk {
256 text: "Bottom".to_string(),
257 x: 100.0,
258 y: 100.0,
259 font_size: 12.0,
260 font_name: None,
261 });
262
263 streamer.buffer.push_back(TextChunk {
264 text: "Top".to_string(),
265 x: 100.0,
266 y: 700.0,
267 font_size: 12.0,
268 font_name: None,
269 });
270
271 streamer.buffer.push_back(TextChunk {
272 text: "Middle".to_string(),
273 x: 100.0,
274 y: 400.0,
275 font_size: 12.0,
276 font_name: None,
277 });
278
279 let text = streamer.extract_text();
280 assert_eq!(text, "Top Middle Bottom");
281 }
282
283 #[test]
284 fn test_buffer_management() {
285 let mut options = TextStreamOptions::default();
286 options.max_buffer_size = 10; let mut streamer = TextStreamer::new(options);
288
289 for i in 0..5 {
291 streamer.buffer.push_back(TextChunk {
292 text: format!("Text{i}"),
293 x: 0.0,
294 y: 0.0,
295 font_size: 12.0,
296 font_name: None,
297 });
298 }
299
300 streamer.check_buffer_size();
301
302 assert!(streamer.buffer.len() < 5);
304 }
305
306 #[test]
307 fn test_stream_text_function() {
308 let content1 = b"BT /F1 12 Tf 100 700 Td (Page 1) Tj ET".to_vec();
309 let content2 = b"BT /F1 12 Tf 100 650 Td (Page 2) Tj ET".to_vec();
310 let streams = vec![content1, content2];
311
312 let mut collected = Vec::new();
313 stream_text(streams, |chunk| {
314 collected.push(chunk.text);
315 Ok(())
316 })
317 .unwrap();
318
319 assert_eq!(collected.len(), 2);
320 assert_eq!(collected[0], "Page 1");
321 assert_eq!(collected[1], "Page 2");
322 }
323
324 #[test]
325 fn test_text_chunk_debug_clone() {
326 let chunk = TextChunk {
327 text: "Test".to_string(),
328 x: 50.0,
329 y: 100.0,
330 font_size: 10.0,
331 font_name: Some("Arial".to_string()),
332 };
333
334 let debug_str = format!("{chunk:?}");
335 assert!(debug_str.contains("TextChunk"));
336 assert!(debug_str.contains("Test"));
337
338 let cloned = chunk.clone();
339 assert_eq!(cloned.text, chunk.text);
340 assert_eq!(cloned.x, chunk.x);
341 assert_eq!(cloned.y, chunk.y);
342 assert_eq!(cloned.font_size, chunk.font_size);
343 assert_eq!(cloned.font_name, chunk.font_name);
344 }
345
346 #[test]
347 fn test_text_stream_options_custom() {
348 let options = TextStreamOptions {
349 min_font_size: 8.0,
350 max_buffer_size: 2048,
351 preserve_formatting: false,
352 sort_by_position: false,
353 };
354
355 assert_eq!(options.min_font_size, 8.0);
356 assert_eq!(options.max_buffer_size, 2048);
357 assert!(!options.preserve_formatting);
358 assert!(!options.sort_by_position);
359 }
360
361 #[test]
362 fn test_text_stream_options_debug_clone() {
363 let options = TextStreamOptions::default();
364
365 let debug_str = format!("{options:?}");
366 assert!(debug_str.contains("TextStreamOptions"));
367
368 let cloned = options.clone();
369 assert_eq!(cloned.min_font_size, options.min_font_size);
370 assert_eq!(cloned.max_buffer_size, options.max_buffer_size);
371 assert_eq!(cloned.preserve_formatting, options.preserve_formatting);
372 assert_eq!(cloned.sort_by_position, options.sort_by_position);
373 }
374
375 #[test]
376 fn test_text_streamer_process_empty_chunk() {
377 let mut streamer = TextStreamer::new(TextStreamOptions::default());
378 let chunks = streamer.process_chunk(b"").unwrap();
379 assert!(chunks.is_empty());
380 }
381
382 #[test]
383 fn test_text_streamer_process_invalid_content() {
384 let mut streamer = TextStreamer::new(TextStreamOptions::default());
385 let content = b"Not valid PDF content";
387 let result = streamer.process_chunk(content);
388 match result {
390 Ok(chunks) => assert!(chunks.is_empty()),
391 Err(_) => {} }
393 }
394
395 #[test]
396 fn test_text_streamer_font_tracking() {
397 let mut streamer = TextStreamer::new(TextStreamOptions::default());
398
399 let content = b"BT /Helvetica-Bold 16 Tf ET";
401 let _ = streamer.process_chunk(content).unwrap();
402
403 assert_eq!(streamer.current_font, Some("Helvetica-Bold".to_string()));
404 assert_eq!(streamer.current_font_size, 16.0);
405 }
406
407 #[test]
408 fn test_text_streamer_position_tracking() {
409 let mut streamer = TextStreamer::new(TextStreamOptions::default());
410
411 let content = b"BT 50 100 Td ET";
413 let _ = streamer.process_chunk(content).unwrap();
414
415 assert_eq!(streamer.current_x, 50.0);
416 assert_eq!(streamer.current_y, 100.0);
417 }
418
419 #[test]
420 fn test_text_streamer_begin_text_resets_position() {
421 let mut streamer = TextStreamer::new(TextStreamOptions::default());
422
423 streamer.current_x = 100.0;
425 streamer.current_y = 200.0;
426
427 let content = b"BT ET";
429 let _ = streamer.process_chunk(content).unwrap();
430
431 assert_eq!(streamer.current_x, 0.0);
432 assert_eq!(streamer.current_y, 0.0);
433 }
434
435 #[test]
436 fn test_text_streamer_clear_buffer() {
437 let mut streamer = TextStreamer::new(TextStreamOptions::default());
438
439 streamer.buffer.push_back(TextChunk {
441 text: "Chunk1".to_string(),
442 x: 0.0,
443 y: 0.0,
444 font_size: 12.0,
445 font_name: None,
446 });
447 streamer.buffer.push_back(TextChunk {
448 text: "Chunk2".to_string(),
449 x: 0.0,
450 y: 0.0,
451 font_size: 12.0,
452 font_name: None,
453 });
454
455 assert_eq!(streamer.buffer.len(), 2);
456
457 streamer.clear_buffer();
458 assert!(streamer.buffer.is_empty());
459 }
460
461 #[test]
462 fn test_text_streamer_get_buffered_chunks() {
463 let mut streamer = TextStreamer::new(TextStreamOptions::default());
464
465 let chunk1 = TextChunk {
466 text: "First".to_string(),
467 x: 10.0,
468 y: 20.0,
469 font_size: 14.0,
470 font_name: Some("Times".to_string()),
471 };
472 let chunk2 = TextChunk {
473 text: "Second".to_string(),
474 x: 30.0,
475 y: 40.0,
476 font_size: 16.0,
477 font_name: Some("Arial".to_string()),
478 };
479
480 streamer.buffer.push_back(chunk1.clone());
481 streamer.buffer.push_back(chunk2.clone());
482
483 let chunks = streamer.get_buffered_chunks();
484 assert_eq!(chunks.len(), 2);
485 assert_eq!(chunks[0].text, "First");
486 assert_eq!(chunks[1].text, "Second");
487 }
488
489 #[test]
490 fn test_extract_text_no_sorting() {
491 let mut options = TextStreamOptions::default();
492 options.sort_by_position = false;
493 let mut streamer = TextStreamer::new(options);
494
495 streamer.buffer.push_back(TextChunk {
497 text: "First".to_string(),
498 x: 200.0,
499 y: 100.0,
500 font_size: 12.0,
501 font_name: None,
502 });
503 streamer.buffer.push_back(TextChunk {
504 text: "Second".to_string(),
505 x: 100.0,
506 y: 200.0,
507 font_size: 12.0,
508 font_name: None,
509 });
510
511 let text = streamer.extract_text();
512 assert_eq!(text, "First Second"); }
514
515 #[test]
516 fn test_extract_text_horizontal_sorting() {
517 let mut streamer = TextStreamer::new(TextStreamOptions::default());
518
519 streamer.buffer.push_back(TextChunk {
521 text: "Right".to_string(),
522 x: 300.0,
523 y: 500.0,
524 font_size: 12.0,
525 font_name: None,
526 });
527 streamer.buffer.push_back(TextChunk {
528 text: "Left".to_string(),
529 x: 100.0,
530 y: 500.0,
531 font_size: 12.0,
532 font_name: None,
533 });
534 streamer.buffer.push_back(TextChunk {
535 text: "Middle".to_string(),
536 x: 200.0,
537 y: 500.0,
538 font_size: 12.0,
539 font_name: None,
540 });
541
542 let text = streamer.extract_text();
543 assert_eq!(text, "Left Middle Right");
544 }
545
546 #[test]
547 fn test_check_buffer_size_edge_cases() {
548 let mut options = TextStreamOptions::default();
549 options.max_buffer_size = 20;
550 let mut streamer = TextStreamer::new(options);
551
552 streamer.buffer.push_back(TextChunk {
554 text: "a".repeat(20),
555 x: 0.0,
556 y: 0.0,
557 font_size: 12.0,
558 font_name: None,
559 });
560
561 streamer.check_buffer_size();
562 assert_eq!(streamer.buffer.len(), 1); streamer.buffer.push_back(TextChunk {
566 text: "b".to_string(),
567 x: 0.0,
568 y: 0.0,
569 font_size: 12.0,
570 font_name: None,
571 });
572
573 streamer.check_buffer_size();
574 assert!(streamer.buffer.len() <= 1);
576 }
577
578 #[test]
579 fn test_stream_text_with_error_callback() {
580 let content = b"BT /F1 12 Tf 100 700 Td (Test) Tj ET".to_vec();
581 let streams = vec![content];
582
583 let result = stream_text(streams, |_chunk| {
584 Err(crate::error::PdfError::ParseError("Test error".to_string()))
585 });
586
587 assert!(result.is_err());
588 }
589
590 #[test]
591 fn test_stream_text_empty_streams() {
592 let streams: Vec<Vec<u8>> = vec![];
593
594 let mut collected = Vec::new();
595 stream_text(streams, |chunk| {
596 collected.push(chunk);
597 Ok(())
598 })
599 .unwrap();
600
601 assert!(collected.is_empty());
602 }
603
604 #[test]
605 fn test_text_chunk_without_font_name() {
606 let chunk = TextChunk {
607 text: "No Font".to_string(),
608 x: 0.0,
609 y: 0.0,
610 font_size: 12.0,
611 font_name: None,
612 };
613
614 assert_eq!(chunk.font_name, None);
615 }
616
617 #[test]
618 fn test_process_chunk_multiple_operations() {
619 let mut streamer = TextStreamer::new(TextStreamOptions::default());
620
621 let content = b"BT /F1 10 Tf 100 700 Td (First) Tj 50 0 Td (Second) Tj ET";
623 let chunks = streamer.process_chunk(content).unwrap();
624
625 assert_eq!(chunks.len(), 2);
626 assert_eq!(chunks[0].text, "First");
627 assert_eq!(chunks[1].text, "Second");
628 assert_eq!(chunks[0].x, 100.0);
629 assert_eq!(chunks[1].x, 150.0); }
631
632 #[test]
633 fn test_buffer_size_calculation() {
634 let mut options = TextStreamOptions::default();
635 options.max_buffer_size = 100;
636 let mut streamer = TextStreamer::new(options);
637
638 for _i in 0..10 {
640 streamer.buffer.push_back(TextChunk {
641 text: "1234567890".to_string(), x: 0.0,
643 y: 0.0,
644 font_size: 12.0,
645 font_name: None,
646 });
647 }
648
649 streamer.check_buffer_size();
651
652 streamer.buffer.push_back(TextChunk {
654 text: "x".to_string(),
655 x: 0.0,
656 y: 0.0,
657 font_size: 12.0,
658 font_name: None,
659 });
660
661 streamer.check_buffer_size();
662
663 let total_size: usize = streamer.buffer.iter().map(|c| c.text.len()).sum();
665 assert!(total_size <= 100);
666 }
667
668 #[test]
669 fn test_text_chunk_extreme_positions() {
670 let chunk = TextChunk {
671 text: "Extreme".to_string(),
672 x: f64::MAX,
673 y: f64::MIN,
674 font_size: 0.1,
675 font_name: Some("TinyFont".to_string()),
676 };
677
678 assert_eq!(chunk.x, f64::MAX);
679 assert_eq!(chunk.y, f64::MIN);
680 assert_eq!(chunk.font_size, 0.1);
681 }
682
683 #[test]
684 fn test_text_streamer_accumulated_position() {
685 let mut streamer = TextStreamer::new(TextStreamOptions::default());
686
687 let content = b"BT 10 20 Td 5 10 Td 15 -5 Td ET";
689 let _ = streamer.process_chunk(content).unwrap();
690
691 assert_eq!(streamer.current_x, 30.0); assert_eq!(streamer.current_y, 25.0); }
694
695 #[test]
696 fn test_process_chunk_with_multiple_font_changes() {
697 let mut streamer = TextStreamer::new(TextStreamOptions::default());
698
699 let content = b"BT /F1 10 Tf (Small) Tj /F2 24 Tf (Large) Tj /F3 16 Tf (Medium) Tj ET";
700 let chunks = streamer.process_chunk(content).unwrap();
701
702 assert_eq!(chunks.len(), 3);
703 assert_eq!(chunks[0].font_size, 10.0);
704 assert_eq!(chunks[1].font_size, 24.0);
705 assert_eq!(chunks[2].font_size, 16.0);
706 }
707
708 #[test]
709 fn test_empty_text_operations() {
710 let mut streamer = TextStreamer::new(TextStreamOptions::default());
711
712 let content = b"BT /F1 12 Tf () Tj ( ) Tj ET";
714 let chunks = streamer.process_chunk(content).unwrap();
715
716 assert_eq!(chunks.len(), 2);
717 assert!(chunks[0].text.is_empty());
718 assert_eq!(chunks[1].text, " ");
719 }
720
721 #[test]
722 fn test_text_with_special_characters() {
723 let mut streamer = TextStreamer::new(TextStreamOptions::default());
724
725 let content = b"BT /F1 12 Tf (\xC3\xA9\xC3\xA0\xC3\xB1) Tj ET"; let chunks = streamer.process_chunk(content).unwrap();
727
728 assert!(!chunks.is_empty());
729 assert!(!chunks[0].text.is_empty());
731 }
732
733 #[test]
734 fn test_sorting_with_equal_positions() {
735 let mut streamer = TextStreamer::new(TextStreamOptions::default());
736
737 for i in 0..3 {
739 streamer.buffer.push_back(TextChunk {
740 text: format!("Text{i}"),
741 x: 100.0,
742 y: 100.0,
743 font_size: 12.0,
744 font_name: None,
745 });
746 }
747
748 let text = streamer.extract_text();
749 assert!(text.contains("Text0"));
751 assert!(text.contains("Text1"));
752 assert!(text.contains("Text2"));
753 }
754
755 #[test]
756 fn test_max_buffer_size_zero() {
757 let mut options = TextStreamOptions::default();
758 options.max_buffer_size = 0;
759 let mut streamer = TextStreamer::new(options);
760
761 streamer.buffer.push_back(TextChunk {
762 text: "Should be removed".to_string(),
763 x: 0.0,
764 y: 0.0,
765 font_size: 12.0,
766 font_name: None,
767 });
768
769 streamer.check_buffer_size();
770 assert!(streamer.buffer.is_empty());
771 }
772
773 #[test]
774 fn test_font_name_with_spaces() {
775 let mut streamer = TextStreamer::new(TextStreamOptions::default());
776
777 let content = b"BT /Times New Roman 14 Tf ET";
778 let result = streamer.process_chunk(content);
779
780 assert!(result.is_err());
782
783 assert_eq!(streamer.current_font, None);
785 assert_eq!(streamer.current_font_size, 12.0);
786 }
787
788 #[test]
789 fn test_stream_text_with_mixed_content() {
790 let content1 = b"BT /F1 8 Tf (Small) Tj ET".to_vec();
791 let content2 = b"Invalid content".to_vec();
792 let content3 = b"BT /F2 16 Tf (Large) Tj ET".to_vec();
793 let streams = vec![content1, content2, content3];
794
795 let mut collected = Vec::new();
796 let result = stream_text(streams, |chunk| {
797 collected.push(chunk.text.clone());
798 Ok(())
799 });
800
801 assert!(result.is_ok() || result.is_err());
803 }
805
806 #[test]
807 fn test_preserve_formatting_option() {
808 let mut options = TextStreamOptions::default();
809 options.preserve_formatting = false;
810 let streamer = TextStreamer::new(options.clone());
811
812 assert!(!streamer.options.preserve_formatting);
813 assert_eq!(streamer.options.min_font_size, options.min_font_size);
814 }
815
816 #[test]
817 fn test_very_large_font_size() {
818 let mut streamer = TextStreamer::new(TextStreamOptions::default());
819
820 let content = b"BT /F1 9999 Tf (Huge) Tj ET";
821 let chunks = streamer.process_chunk(content).unwrap();
822
823 assert!(!chunks.is_empty());
824 assert_eq!(chunks[0].font_size, 9999.0);
825 assert_eq!(chunks[0].text, "Huge");
826 }
827
828 #[test]
829 fn test_negative_font_size() {
830 let mut options = TextStreamOptions::default();
831 options.min_font_size = -10.0; let mut streamer = TextStreamer::new(options);
833
834 streamer.current_font_size = -5.0;
835 let content = b"BT (Negative) Tj ET";
836 let chunks = streamer.process_chunk(content).unwrap();
837
838 assert!(!chunks.is_empty());
839 assert_eq!(chunks[0].font_size, -5.0);
840 }
841
842 #[test]
843 fn test_text_position_nan_handling() {
844 let mut streamer = TextStreamer::new(TextStreamOptions::default());
845
846 let chunk1 = TextChunk {
848 text: "NaN X".to_string(),
849 x: f64::NAN,
850 y: 100.0,
851 font_size: 12.0,
852 font_name: None,
853 };
854 let chunk2 = TextChunk {
855 text: "NaN Y".to_string(),
856 x: 100.0,
857 y: f64::NAN,
858 font_size: 12.0,
859 font_name: None,
860 };
861
862 streamer.buffer.push_back(chunk1);
863 streamer.buffer.push_back(chunk2);
864
865 let text = streamer.extract_text();
867 assert!(text.contains("NaN"));
868 }
869
870 #[test]
871 fn test_buffer_with_different_font_names() {
872 let mut streamer = TextStreamer::new(TextStreamOptions::default());
873
874 let fonts = ["Arial", "Times", "Courier", "Helvetica"];
875 for (i, font) in fonts.iter().enumerate() {
876 streamer.buffer.push_back(TextChunk {
877 text: format!("Font{i}"),
878 x: 0.0,
879 y: 0.0,
880 font_size: 12.0,
881 font_name: Some(font.to_string()),
882 });
883 }
884
885 let chunks = streamer.get_buffered_chunks();
886 assert_eq!(chunks.len(), 4);
887 for (i, chunk) in chunks.iter().enumerate() {
888 assert_eq!(chunk.font_name, Some(fonts[i].to_string()));
889 }
890 }
891
892 #[test]
893 fn test_process_chunk_error_propagation() {
894 let mut streamer = TextStreamer::new(TextStreamOptions::default());
895
896 let content = b"\xFF\xFE\xFD\xFC"; let result = streamer.process_chunk(content);
899
900 assert!(result.is_ok() || result.is_err());
902 }
903
904 #[test]
905 fn test_extract_text_empty_buffer() {
906 let streamer = TextStreamer::new(TextStreamOptions::default());
907 let text = streamer.extract_text();
908 assert!(text.is_empty());
909 }
910
911 #[test]
912 fn test_extract_text_single_chunk() {
913 let mut streamer = TextStreamer::new(TextStreamOptions::default());
914
915 streamer.buffer.push_back(TextChunk {
916 text: "Single".to_string(),
917 x: 0.0,
918 y: 0.0,
919 font_size: 12.0,
920 font_name: None,
921 });
922
923 let text = streamer.extract_text();
924 assert_eq!(text, "Single");
925 }
926
927 #[test]
928 fn test_check_buffer_size_empty() {
929 let mut streamer = TextStreamer::new(TextStreamOptions::default());
930 streamer.check_buffer_size(); assert!(streamer.buffer.is_empty());
932 }
933
934 #[test]
935 fn test_complex_content_operations() {
936 let mut streamer = TextStreamer::new(TextStreamOptions::default());
937
938 let content = b"BT /F1 12 Tf 0 0 Td (Start) Tj ET q Q BT 50 50 Td (End) Tj ET";
940 let chunks = streamer.process_chunk(content).unwrap();
941
942 assert_eq!(chunks.len(), 2);
943 assert_eq!(chunks[0].text, "Start");
944 assert_eq!(chunks[1].text, "End");
945 assert_eq!(chunks[0].x, 0.0);
946 assert_eq!(chunks[1].x, 50.0);
947 }
948
949 #[test]
950 fn test_stream_text_callback_state() {
951 let content = b"BT /F1 12 Tf (Test) Tj ET".to_vec();
952 let streams = vec![content; 3]; let mut count = 0;
955 stream_text(streams, |_chunk| {
956 count += 1;
957 Ok(())
958 })
959 .unwrap();
960
961 assert_eq!(count, 3);
962 }
963}