1use pdfplumber_core::{BBox, ExtractOptions, PdfError};
7
8use crate::handler::ContentHandler;
9
10pub trait PdfBackend {
32 type Document;
34
35 type Page;
37
38 type Error: std::error::Error + Into<PdfError>;
40
41 fn open(bytes: &[u8]) -> Result<Self::Document, Self::Error>;
47
48 fn page_count(doc: &Self::Document) -> usize;
50
51 fn get_page(doc: &Self::Document, index: usize) -> Result<Self::Page, Self::Error>;
57
58 fn page_media_box(doc: &Self::Document, page: &Self::Page) -> Result<BBox, Self::Error>;
69
70 fn page_crop_box(doc: &Self::Document, page: &Self::Page) -> Result<Option<BBox>, Self::Error>;
79
80 fn page_rotate(doc: &Self::Document, page: &Self::Page) -> Result<i32, Self::Error>;
88
89 fn interpret_page(
101 doc: &Self::Document,
102 page: &Self::Page,
103 handler: &mut dyn ContentHandler,
104 options: &ExtractOptions,
105 ) -> Result<(), Self::Error>;
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111 use crate::handler::{CharEvent, ImageEvent, PaintOp, PathEvent};
112 use pdfplumber_core::{Color, PathSegment, Point};
113
114 #[derive(Debug)]
117 struct MockDocument {
118 pages: Vec<MockPageData>,
119 }
120
121 #[derive(Debug)]
122 struct MockPageData {
123 media_box: BBox,
124 crop_box: Option<BBox>,
125 rotate: i32,
126 }
127
128 #[derive(Debug)]
129 struct MockPage {
130 index: usize,
131 }
132
133 struct CollectingHandler {
136 chars: Vec<CharEvent>,
137 paths: Vec<PathEvent>,
138 images: Vec<ImageEvent>,
139 }
140
141 impl CollectingHandler {
142 fn new() -> Self {
143 Self {
144 chars: Vec::new(),
145 paths: Vec::new(),
146 images: Vec::new(),
147 }
148 }
149 }
150
151 impl ContentHandler for CollectingHandler {
152 fn on_char(&mut self, event: CharEvent) {
153 self.chars.push(event);
154 }
155
156 fn on_path_painted(&mut self, event: PathEvent) {
157 self.paths.push(event);
158 }
159
160 fn on_image(&mut self, event: ImageEvent) {
161 self.images.push(event);
162 }
163 }
164
165 struct MockBackend;
168
169 impl PdfBackend for MockBackend {
170 type Document = MockDocument;
171 type Page = MockPage;
172 type Error = PdfError;
173
174 fn open(bytes: &[u8]) -> Result<Self::Document, Self::Error> {
175 if bytes.is_empty() {
176 return Err(PdfError::ParseError("empty input".to_string()));
177 }
178 let page_count = bytes[0] as usize;
180 let mut pages = Vec::new();
181 for _ in 0..page_count {
182 pages.push(MockPageData {
183 media_box: BBox::new(0.0, 0.0, 612.0, 792.0), crop_box: None,
185 rotate: 0,
186 });
187 }
188 Ok(MockDocument { pages })
189 }
190
191 fn page_count(doc: &Self::Document) -> usize {
192 doc.pages.len()
193 }
194
195 fn get_page(doc: &Self::Document, index: usize) -> Result<Self::Page, Self::Error> {
196 if index >= doc.pages.len() {
197 return Err(PdfError::ParseError(format!(
198 "page index {index} out of range (0..{})",
199 doc.pages.len()
200 )));
201 }
202 Ok(MockPage { index })
203 }
204
205 fn page_media_box(doc: &Self::Document, page: &Self::Page) -> Result<BBox, Self::Error> {
206 Ok(doc.pages[page.index].media_box)
207 }
208
209 fn page_crop_box(
210 doc: &Self::Document,
211 page: &Self::Page,
212 ) -> Result<Option<BBox>, Self::Error> {
213 Ok(doc.pages[page.index].crop_box)
214 }
215
216 fn page_rotate(doc: &Self::Document, page: &Self::Page) -> Result<i32, Self::Error> {
217 Ok(doc.pages[page.index].rotate)
218 }
219
220 fn interpret_page(
221 _doc: &Self::Document,
222 _page: &Self::Page,
223 handler: &mut dyn ContentHandler,
224 _options: &ExtractOptions,
225 ) -> Result<(), Self::Error> {
226 handler.on_char(CharEvent {
228 char_code: 72, unicode: Some("H".to_string()),
230 font_name: "Times-Roman".to_string(),
231 font_size: 14.0,
232 text_matrix: [1.0, 0.0, 0.0, 1.0, 72.0, 720.0],
233 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
234 displacement: 722.0,
235 char_spacing: 0.0,
236 word_spacing: 0.0,
237 h_scaling: 1.0,
238 rise: 0.0,
239 });
240
241 handler.on_path_painted(PathEvent {
243 segments: vec![
244 PathSegment::MoveTo(Point::new(72.0, 700.0)),
245 PathSegment::LineTo(Point::new(540.0, 700.0)),
246 ],
247 paint_op: PaintOp::Stroke,
248 line_width: 0.5,
249 stroking_color: Some(Color::black()),
250 non_stroking_color: None,
251 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0],
252 dash_pattern: None,
253 fill_rule: None,
254 });
255
256 handler.on_image(ImageEvent {
258 name: "Im1".to_string(),
259 ctm: [100.0, 0.0, 0.0, 75.0, 72.0, 600.0],
260 width: 400,
261 height: 300,
262 colorspace: Some("DeviceRGB".to_string()),
263 bits_per_component: Some(8),
264 });
265
266 Ok(())
267 }
268 }
269
270 #[test]
273 fn mock_backend_open_valid_document() {
274 let doc = MockBackend::open(&[3]).unwrap();
275 assert_eq!(MockBackend::page_count(&doc), 3);
276 }
277
278 #[test]
279 fn mock_backend_open_single_page() {
280 let doc = MockBackend::open(&[1]).unwrap();
281 assert_eq!(MockBackend::page_count(&doc), 1);
282 }
283
284 #[test]
285 fn mock_backend_open_empty_bytes_fails() {
286 let result = MockBackend::open(&[]);
287 assert!(result.is_err());
288 }
289
290 #[test]
293 fn mock_backend_get_page_valid_index() {
294 let doc = MockBackend::open(&[3]).unwrap();
295 let page = MockBackend::get_page(&doc, 0).unwrap();
296 assert_eq!(page.index, 0);
297
298 let page2 = MockBackend::get_page(&doc, 2).unwrap();
299 assert_eq!(page2.index, 2);
300 }
301
302 #[test]
303 fn mock_backend_get_page_out_of_bounds() {
304 let doc = MockBackend::open(&[2]).unwrap();
305 let result = MockBackend::get_page(&doc, 5);
306 assert!(result.is_err());
307 }
308
309 #[test]
312 fn mock_backend_page_media_box() {
313 let doc = MockBackend::open(&[1]).unwrap();
314 let page = MockBackend::get_page(&doc, 0).unwrap();
315 let media_box = MockBackend::page_media_box(&doc, &page).unwrap();
316 assert_eq!(media_box, BBox::new(0.0, 0.0, 612.0, 792.0));
317 }
318
319 #[test]
322 fn mock_backend_page_crop_box_none() {
323 let doc = MockBackend::open(&[1]).unwrap();
324 let page = MockBackend::get_page(&doc, 0).unwrap();
325 let crop_box = MockBackend::page_crop_box(&doc, &page).unwrap();
326 assert_eq!(crop_box, None);
327 }
328
329 #[test]
332 fn mock_backend_page_rotate_default() {
333 let doc = MockBackend::open(&[1]).unwrap();
334 let page = MockBackend::get_page(&doc, 0).unwrap();
335 let rotate = MockBackend::page_rotate(&doc, &page).unwrap();
336 assert_eq!(rotate, 0);
337 }
338
339 #[test]
342 fn mock_backend_interpret_page_emits_char() {
343 let doc = MockBackend::open(&[1]).unwrap();
344 let page = MockBackend::get_page(&doc, 0).unwrap();
345 let options = ExtractOptions::default();
346 let mut handler = CollectingHandler::new();
347
348 MockBackend::interpret_page(&doc, &page, &mut handler, &options).unwrap();
349
350 assert_eq!(handler.chars.len(), 1);
351 assert_eq!(handler.chars[0].char_code, 72);
352 assert_eq!(handler.chars[0].unicode.as_deref(), Some("H"));
353 assert_eq!(handler.chars[0].font_name, "Times-Roman");
354 assert_eq!(handler.chars[0].font_size, 14.0);
355 }
356
357 #[test]
358 fn mock_backend_interpret_page_emits_path() {
359 let doc = MockBackend::open(&[1]).unwrap();
360 let page = MockBackend::get_page(&doc, 0).unwrap();
361 let options = ExtractOptions::default();
362 let mut handler = CollectingHandler::new();
363
364 MockBackend::interpret_page(&doc, &page, &mut handler, &options).unwrap();
365
366 assert_eq!(handler.paths.len(), 1);
367 assert_eq!(handler.paths[0].paint_op, PaintOp::Stroke);
368 assert_eq!(handler.paths[0].segments.len(), 2);
369 assert_eq!(handler.paths[0].line_width, 0.5);
370 }
371
372 #[test]
373 fn mock_backend_interpret_page_emits_image() {
374 let doc = MockBackend::open(&[1]).unwrap();
375 let page = MockBackend::get_page(&doc, 0).unwrap();
376 let options = ExtractOptions::default();
377 let mut handler = CollectingHandler::new();
378
379 MockBackend::interpret_page(&doc, &page, &mut handler, &options).unwrap();
380
381 assert_eq!(handler.images.len(), 1);
382 assert_eq!(handler.images[0].name, "Im1");
383 assert_eq!(handler.images[0].width, 400);
384 assert_eq!(handler.images[0].height, 300);
385 }
386
387 #[test]
388 fn mock_backend_interpret_page_uses_trait_object() {
389 let doc = MockBackend::open(&[1]).unwrap();
390 let page = MockBackend::get_page(&doc, 0).unwrap();
391 let options = ExtractOptions::default();
392 let mut handler = CollectingHandler::new();
393
394 let handler_ref: &mut dyn ContentHandler = &mut handler;
396 MockBackend::interpret_page(&doc, &page, handler_ref, &options).unwrap();
397
398 assert_eq!(handler.chars.len(), 1);
399 assert_eq!(handler.paths.len(), 1);
400 assert_eq!(handler.images.len(), 1);
401 }
402
403 #[test]
406 fn mock_backend_error_converts_to_pdf_error() {
407 let result = MockBackend::open(&[]);
408 let err = result.unwrap_err();
409 let pdf_err: PdfError = err.into();
411 assert!(matches!(pdf_err, PdfError::ParseError(_)));
412 }
413
414 #[test]
415 fn mock_backend_error_is_std_error() {
416 let result = MockBackend::open(&[]);
417 let err = result.unwrap_err();
418 let std_err: Box<dyn std::error::Error> = Box::new(err);
419 assert!(std_err.to_string().contains("empty input"));
420 }
421
422 #[test]
425 fn mock_backend_custom_page_properties() {
426 let doc = MockDocument {
427 pages: vec![
428 MockPageData {
429 media_box: BBox::new(0.0, 0.0, 595.0, 842.0), crop_box: Some(BBox::new(10.0, 10.0, 585.0, 832.0)),
431 rotate: 90,
432 },
433 MockPageData {
434 media_box: BBox::new(0.0, 0.0, 842.0, 595.0), crop_box: None,
436 rotate: 0,
437 },
438 ],
439 };
440
441 let page0 = MockBackend::get_page(&doc, 0).unwrap();
443 let media_box0 = MockBackend::page_media_box(&doc, &page0).unwrap();
444 assert_eq!(media_box0, BBox::new(0.0, 0.0, 595.0, 842.0));
445
446 let crop_box0 = MockBackend::page_crop_box(&doc, &page0).unwrap();
447 assert_eq!(crop_box0, Some(BBox::new(10.0, 10.0, 585.0, 832.0)));
448
449 let rotate0 = MockBackend::page_rotate(&doc, &page0).unwrap();
450 assert_eq!(rotate0, 90);
451
452 let page1 = MockBackend::get_page(&doc, 1).unwrap();
454 let crop_box1 = MockBackend::page_crop_box(&doc, &page1).unwrap();
455 assert_eq!(crop_box1, None);
456
457 let rotate1 = MockBackend::page_rotate(&doc, &page1).unwrap();
458 assert_eq!(rotate1, 0);
459 }
460}