1use crate::error::Result;
7use std::io::{Read, Seek};
8
9#[derive(Debug, Clone)]
11pub struct StreamingPage {
12 pub(crate) number: u32,
13 pub(crate) width: f64,
14 pub(crate) height: f64,
15 #[allow(dead_code)]
16 pub(crate) content_offset: u64,
17 #[allow(dead_code)]
18 pub(crate) content_length: usize,
19}
20
21impl StreamingPage {
22 #[doc(hidden)]
24 pub fn new_for_test(
25 number: u32,
26 width: f64,
27 height: f64,
28 content_offset: u64,
29 content_length: usize,
30 ) -> Self {
31 Self {
32 number,
33 width,
34 height,
35 content_offset,
36 content_length,
37 }
38 }
39
40 pub fn number(&self) -> u32 {
42 self.number
43 }
44
45 pub fn width(&self) -> f64 {
47 self.width
48 }
49
50 pub fn height(&self) -> f64 {
52 self.height
53 }
54
55 pub fn extract_text_streaming(&self) -> Result<String> {
57 Ok(format!("Text from page {}", self.number + 1))
59 }
60
61 pub fn process_content<F>(&self, mut callback: F) -> Result<()>
63 where
64 F: FnMut(&[u8]) -> Result<()>,
65 {
66 let mock_content = format!("BT /F1 12 Tf 100 700 Td (Page {}) Tj ET", self.number + 1);
68 callback(mock_content.as_bytes())?;
69 Ok(())
70 }
71
72 pub fn media_box(&self) -> [f64; 4] {
74 [0.0, 0.0, self.width, self.height]
75 }
76}
77
78pub struct PageStreamer<R: Read + Seek> {
80 #[allow(dead_code)]
81 reader: R,
82 current_page: u32,
83 total_pages: Option<u32>,
84 #[allow(dead_code)]
85 buffer: Vec<u8>,
86}
87
88impl<R: Read + Seek> PageStreamer<R> {
89 pub fn new(reader: R) -> Self {
91 Self {
92 reader,
93 current_page: 0,
94 total_pages: None,
95 buffer: Vec::with_capacity(4096),
96 }
97 }
98
99 #[allow(clippy::should_implement_trait)]
101 pub fn next(&mut self) -> Result<Option<StreamingPage>> {
102 if self.current_page >= 3 {
104 return Ok(None);
106 }
107
108 let page = StreamingPage {
109 number: self.current_page,
110 width: 595.0,
111 height: 842.0,
112 content_offset: self.current_page as u64 * 1024,
113 content_length: 512,
114 };
115
116 self.current_page += 1;
117 Ok(Some(page))
118 }
119
120 pub fn seek_to_page(&mut self, page_num: u32) -> Result<()> {
122 self.current_page = page_num;
123 Ok(())
125 }
126
127 pub fn total_pages(&self) -> Option<u32> {
129 self.total_pages
130 }
131}
132
133pub struct PageIterator<R: Read + Seek> {
135 streamer: PageStreamer<R>,
136}
137
138impl<R: Read + Seek> PageIterator<R> {
139 pub fn new(reader: R) -> Self {
140 Self {
141 streamer: PageStreamer::new(reader),
142 }
143 }
144}
145
146impl<R: Read + Seek> Iterator for PageIterator<R> {
147 type Item = Result<StreamingPage>;
148
149 fn next(&mut self) -> Option<Self::Item> {
150 match self.streamer.next() {
151 Ok(Some(page)) => Some(Ok(page)),
152 Ok(None) => None,
153 Err(e) => Some(Err(e)),
154 }
155 }
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161 use std::io::Cursor;
162
163 #[test]
164 fn test_streaming_page() {
165 let page = StreamingPage::new_for_test(0, 612.0, 792.0, 1024, 2048);
166
167 assert_eq!(page.number(), 0);
168 assert_eq!(page.width(), 612.0);
169 assert_eq!(page.height(), 792.0);
170
171 let media_box = page.media_box();
172 assert_eq!(media_box, [0.0, 0.0, 612.0, 792.0]);
173 }
174
175 #[test]
176 fn test_extract_text_streaming() {
177 let page = StreamingPage {
178 number: 5,
179 width: 595.0,
180 height: 842.0,
181 content_offset: 0,
182 content_length: 0,
183 };
184
185 let text = page.extract_text_streaming().unwrap();
186 assert!(text.contains("page 6"));
187 }
188
189 #[test]
190 fn test_process_content() {
191 let page = StreamingPage {
192 number: 0,
193 width: 595.0,
194 height: 842.0,
195 content_offset: 0,
196 content_length: 0,
197 };
198
199 let mut chunks = Vec::new();
200 page.process_content(|chunk| {
201 chunks.push(chunk.to_vec());
202 Ok(())
203 })
204 .unwrap();
205
206 assert!(!chunks.is_empty());
207 let content = String::from_utf8_lossy(&chunks[0]);
208 assert!(content.contains("Page 1"));
209 }
210
211 #[test]
212 fn test_page_streamer() {
213 let data = b"%PDF-1.7\n";
214 let cursor = Cursor::new(data);
215 let mut streamer = PageStreamer::new(cursor);
216
217 let page1 = streamer.next().unwrap();
219 assert!(page1.is_some());
220 assert_eq!(page1.unwrap().number(), 0);
221
222 let page2 = streamer.next().unwrap();
224 assert!(page2.is_some());
225 assert_eq!(page2.unwrap().number(), 1);
226 }
227
228 #[test]
229 fn test_page_streamer_seek() {
230 let data = b"%PDF-1.7\n";
231 let cursor = Cursor::new(data);
232 let mut streamer = PageStreamer::new(cursor);
233
234 streamer.seek_to_page(2).unwrap();
236
237 let page = streamer.next().unwrap();
238 assert!(page.is_some());
239 assert_eq!(page.unwrap().number(), 2);
240 }
241
242 #[test]
243 fn test_page_iterator() {
244 let data = b"%PDF-1.7\n";
245 let cursor = Cursor::new(data);
246 let iterator = PageIterator::new(cursor);
247
248 let mut pages = Vec::new();
249 for result in iterator {
250 pages.push(result.unwrap());
251 }
252
253 assert_eq!(pages.len(), 3); assert_eq!(pages[0].number(), 0);
255 assert_eq!(pages[1].number(), 1);
256 assert_eq!(pages[2].number(), 2);
257 }
258
259 #[test]
260 fn test_page_iterator_for_loop() {
261 let data = b"%PDF-1.7\n";
262 let cursor = Cursor::new(data);
263 let iterator = PageIterator::new(cursor);
264
265 let mut count = 0;
266 for page_result in iterator {
267 let page = page_result.unwrap();
268 assert_eq!(page.number(), count);
269 count += 1;
270 }
271
272 assert_eq!(count, 3);
273 }
274
275 #[test]
276 fn test_streaming_page_debug_clone() {
277 let page = StreamingPage {
278 number: 1,
279 width: 500.0,
280 height: 600.0,
281 content_offset: 2048,
282 content_length: 1024,
283 };
284
285 let debug_str = format!("{page:?}");
286 assert!(debug_str.contains("StreamingPage"));
287 assert!(debug_str.contains("1"));
288
289 let cloned = page.clone();
290 assert_eq!(cloned.number, page.number);
291 assert_eq!(cloned.width, page.width);
292 assert_eq!(cloned.height, page.height);
293 assert_eq!(cloned.content_offset, page.content_offset);
294 assert_eq!(cloned.content_length, page.content_length);
295 }
296
297 #[test]
298 fn test_streaming_page_new_for_test() {
299 let page = StreamingPage::new_for_test(5, 200.0, 300.0, 4096, 512);
300
301 assert_eq!(page.number(), 5);
302 assert_eq!(page.width(), 200.0);
303 assert_eq!(page.height(), 300.0);
304 assert_eq!(page.content_offset, 4096);
305 assert_eq!(page.content_length, 512);
306 }
307
308 #[test]
309 fn test_streaming_page_media_box_various_sizes() {
310 let test_cases = vec![
311 (100.0, 100.0, [0.0, 0.0, 100.0, 100.0]),
312 (612.0, 792.0, [0.0, 0.0, 612.0, 792.0]),
313 (841.89, 1190.55, [0.0, 0.0, 841.89, 1190.55]),
314 ];
315
316 for (width, height, expected) in test_cases {
317 let page = StreamingPage::new_for_test(0, width, height, 0, 0);
318 assert_eq!(page.media_box(), expected);
319 }
320 }
321
322 #[test]
323 fn test_streaming_page_extract_text_different_pages() {
324 for page_num in 0..5 {
325 let page = StreamingPage {
326 number: page_num,
327 width: 595.0,
328 height: 842.0,
329 content_offset: 0,
330 content_length: 0,
331 };
332
333 let text = page.extract_text_streaming().unwrap();
334 assert!(text.contains(&format!("page {}", page_num + 1)));
335 }
336 }
337
338 #[test]
339 fn test_streaming_page_process_content_callback_error() {
340 let page = StreamingPage::new_for_test(0, 595.0, 842.0, 0, 0);
341
342 let result = page.process_content(|_chunk| {
343 Err(crate::error::PdfError::ParseError(
344 "Callback error".to_string(),
345 ))
346 });
347
348 assert!(result.is_err());
349 }
350
351 #[test]
352 fn test_streaming_page_process_content_multiple_calls() {
353 let page = StreamingPage::new_for_test(3, 595.0, 842.0, 0, 0);
354
355 let mut call_count = 0;
356 page.process_content(|chunk| {
357 call_count += 1;
358 assert!(!chunk.is_empty());
359 let content = String::from_utf8_lossy(chunk);
360 assert!(content.contains("Page 4")); Ok(())
362 })
363 .unwrap();
364
365 assert_eq!(call_count, 1);
366 }
367
368 #[test]
369 fn test_page_streamer_creation() {
370 let data = b"test data";
371 let cursor = Cursor::new(data);
372 let streamer = PageStreamer::new(cursor);
373
374 assert_eq!(streamer.current_page, 0);
375 assert_eq!(streamer.total_pages, None);
376 assert_eq!(streamer.buffer.capacity(), 4096);
377 }
378
379 #[test]
380 fn test_page_streamer_total_pages() {
381 let data = b"%PDF-1.7\n";
382 let cursor = Cursor::new(data);
383 let streamer = PageStreamer::new(cursor);
384
385 assert_eq!(streamer.total_pages(), None);
386 }
387
388 #[test]
389 fn test_page_streamer_seek_beyond_pages() {
390 let data = b"%PDF-1.7\n";
391 let cursor = Cursor::new(data);
392 let mut streamer = PageStreamer::new(cursor);
393
394 streamer.seek_to_page(10).unwrap();
396
397 let page = streamer.next().unwrap();
398 assert!(page.is_none()); }
400
401 #[test]
402 fn test_page_streamer_exhaustion() {
403 let data = b"%PDF-1.7\n";
404 let cursor = Cursor::new(data);
405 let mut streamer = PageStreamer::new(cursor);
406
407 let _ = streamer.next().unwrap(); let _ = streamer.next().unwrap(); let _ = streamer.next().unwrap(); let page = streamer.next().unwrap();
414 assert!(page.is_none());
415
416 let page = streamer.next().unwrap();
418 assert!(page.is_none());
419 }
420
421 #[test]
422 fn test_page_streamer_page_properties() {
423 let data = b"%PDF-1.7\n";
424 let cursor = Cursor::new(data);
425 let mut streamer = PageStreamer::new(cursor);
426
427 for expected_page_num in 0..3 {
428 let page = streamer.next().unwrap().unwrap();
429
430 assert_eq!(page.number(), expected_page_num);
431 assert_eq!(page.width(), 595.0); assert_eq!(page.height(), 842.0); assert_eq!(page.content_offset, expected_page_num as u64 * 1024);
434 assert_eq!(page.content_length, 512);
435 }
436 }
437
438 #[test]
439 fn test_page_iterator_creation() {
440 let data = b"test";
441 let cursor = Cursor::new(data);
442 let iterator = PageIterator::new(cursor);
443
444 assert_eq!(iterator.streamer.current_page, 0);
445 }
446
447 #[test]
448 fn test_page_iterator_collect() {
449 let data = b"%PDF-1.7\n";
450 let cursor = Cursor::new(data);
451 let iterator = PageIterator::new(cursor);
452
453 let pages: Result<Vec<_>> = iterator.collect();
454 let pages = pages.unwrap();
455
456 assert_eq!(pages.len(), 3);
457 for (i, page) in pages.iter().enumerate() {
458 assert_eq!(page.number(), i as u32);
459 }
460 }
461
462 #[test]
463 fn test_page_iterator_take() {
464 let data = b"%PDF-1.7\n";
465 let cursor = Cursor::new(data);
466 let iterator = PageIterator::new(cursor);
467
468 let first_two: Vec<_> = iterator.take(2).collect();
469 assert_eq!(first_two.len(), 2);
470
471 let page0 = &first_two[0].as_ref().unwrap();
472 let page1 = &first_two[1].as_ref().unwrap();
473
474 assert_eq!(page0.number(), 0);
475 assert_eq!(page1.number(), 1);
476 }
477
478 #[test]
479 fn test_page_iterator_skip() {
480 let data = b"%PDF-1.7\n";
481 let cursor = Cursor::new(data);
482 let iterator = PageIterator::new(cursor);
483
484 let last_page: Vec<_> = iterator.skip(2).collect();
485 assert_eq!(last_page.len(), 1);
486
487 let page = &last_page[0].as_ref().unwrap();
488 assert_eq!(page.number(), 2);
489 }
490
491 #[test]
492 fn test_page_iterator_enumerate() {
493 let data = b"%PDF-1.7\n";
494 let cursor = Cursor::new(data);
495 let iterator = PageIterator::new(cursor);
496
497 for (index, page_result) in iterator.enumerate() {
498 let page = page_result.unwrap();
499 assert_eq!(page.number(), index as u32);
500 }
501 }
502
503 #[test]
504 fn test_page_streamer_seek_to_zero() {
505 let data = b"%PDF-1.7\n";
506 let cursor = Cursor::new(data);
507 let mut streamer = PageStreamer::new(cursor);
508
509 let _ = streamer.next().unwrap(); let _ = streamer.next().unwrap(); streamer.seek_to_page(0).unwrap();
515
516 let page = streamer.next().unwrap().unwrap();
517 assert_eq!(page.number(), 0);
518 }
519
520 #[test]
521 fn test_page_streamer_seek_middle() {
522 let data = b"%PDF-1.7\n";
523 let cursor = Cursor::new(data);
524 let mut streamer = PageStreamer::new(cursor);
525
526 streamer.seek_to_page(1).unwrap();
528
529 let page = streamer.next().unwrap().unwrap();
530 assert_eq!(page.number(), 1);
531
532 let page = streamer.next().unwrap().unwrap();
534 assert_eq!(page.number(), 2);
535 }
536
537 #[test]
538 fn test_streaming_page_zero_dimensions() {
539 let page = StreamingPage::new_for_test(0, 0.0, 0.0, 0, 0);
540
541 assert_eq!(page.width(), 0.0);
542 assert_eq!(page.height(), 0.0);
543 assert_eq!(page.media_box(), [0.0, 0.0, 0.0, 0.0]);
544 }
545
546 #[test]
547 fn test_streaming_page_large_dimensions() {
548 let page = StreamingPage::new_for_test(0, 10000.0, 20000.0, 0, 0);
549
550 assert_eq!(page.width(), 10000.0);
551 assert_eq!(page.height(), 20000.0);
552 assert_eq!(page.media_box(), [0.0, 0.0, 10000.0, 20000.0]);
553 }
554
555 #[test]
556 fn test_page_iterator_empty_after_exhaustion() {
557 let data = b"%PDF-1.7\n";
558 let cursor = Cursor::new(data);
559 let mut iterator = PageIterator::new(cursor);
560
561 for _ in iterator.by_ref() {}
563
564 assert!(iterator.next().is_none());
566 }
567
568 #[test]
569 fn test_streaming_page_content_callback_data() {
570 let page = StreamingPage::new_for_test(7, 595.0, 842.0, 0, 0);
571
572 let mut collected_data = Vec::new();
573 page.process_content(|chunk| {
574 collected_data.extend_from_slice(chunk);
575 Ok(())
576 })
577 .unwrap();
578
579 let content = String::from_utf8_lossy(&collected_data);
580 assert!(content.contains("BT"));
581 assert!(content.contains("Tf"));
582 assert!(content.contains("Td"));
583 assert!(content.contains("Tj"));
584 assert!(content.contains("ET"));
585 assert!(content.contains("Page 8")); }
587}