1use crate::parser::content::{ContentOperation, ContentParser};
51use crate::parser::{ParseError, PdfDocument};
52use std::fmt;
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub enum LineOrientation {
57 Horizontal,
59 Vertical,
61 Diagonal,
63}
64
65#[derive(Debug, Clone, PartialEq)]
67pub struct VectorLine {
68 pub x1: f64,
70 pub y1: f64,
72 pub x2: f64,
74 pub y2: f64,
76 pub orientation: LineOrientation,
78 pub stroke_width: f64,
80 pub is_stroked: bool,
82 pub color: Option<crate::graphics::Color>,
84}
85
86impl VectorLine {
87 pub fn new(
101 x1: f64,
102 y1: f64,
103 x2: f64,
104 y2: f64,
105 stroke_width: f64,
106 is_stroked: bool,
107 color: Option<crate::graphics::Color>,
108 ) -> Self {
109 let orientation = Self::compute_orientation(x1, y1, x2, y2);
110 Self {
111 x1,
112 y1,
113 x2,
114 y2,
115 orientation,
116 stroke_width,
117 is_stroked,
118 color,
119 }
120 }
121
122 fn compute_orientation(x1: f64, y1: f64, x2: f64, y2: f64) -> LineOrientation {
126 const TOLERANCE: f64 = 0.1;
127
128 let dx = (x2 - x1).abs();
129 let dy = (y2 - y1).abs();
130
131 if dy < TOLERANCE {
132 LineOrientation::Horizontal
133 } else if dx < TOLERANCE {
134 LineOrientation::Vertical
135 } else {
136 LineOrientation::Diagonal
137 }
138 }
139
140 pub fn length(&self) -> f64 {
142 let dx = self.x2 - self.x1;
143 let dy = self.y2 - self.y1;
144 (dx * dx + dy * dy).sqrt()
145 }
146
147 pub fn midpoint(&self) -> (f64, f64) {
149 ((self.x1 + self.x2) / 2.0, (self.y1 + self.y2) / 2.0)
150 }
151}
152
153#[derive(Debug, Clone, Default)]
155pub struct ExtractedGraphics {
156 pub lines: Vec<VectorLine>,
158 pub horizontal_count: usize,
160 pub vertical_count: usize,
162}
163
164impl ExtractedGraphics {
165 pub fn new() -> Self {
167 Self::default()
168 }
169
170 pub fn add_line(&mut self, line: VectorLine) {
172 match line.orientation {
173 LineOrientation::Horizontal => self.horizontal_count += 1,
174 LineOrientation::Vertical => self.vertical_count += 1,
175 LineOrientation::Diagonal => {} }
177 self.lines.push(line);
178 }
179
180 pub fn horizontal_lines(&self) -> impl Iterator<Item = &VectorLine> {
182 self.lines
183 .iter()
184 .filter(|l| l.orientation == LineOrientation::Horizontal)
185 }
186
187 pub fn vertical_lines(&self) -> impl Iterator<Item = &VectorLine> {
189 self.lines
190 .iter()
191 .filter(|l| l.orientation == LineOrientation::Vertical)
192 }
193
194 pub fn has_table_structure(&self) -> bool {
198 self.horizontal_count >= 2 && self.vertical_count >= 2
199 }
200}
201
202#[derive(Debug, Clone)]
204pub struct ExtractionConfig {
205 pub min_line_length: f64,
207 pub extract_diagonals: bool,
209 pub stroked_only: bool,
211}
212
213impl Default for ExtractionConfig {
214 fn default() -> Self {
215 Self {
216 min_line_length: 1.0, extract_diagonals: false, stroked_only: true, }
220 }
221}
222
223pub struct GraphicsExtractor {
225 config: ExtractionConfig,
226}
227
228impl GraphicsExtractor {
229 pub fn new(config: ExtractionConfig) -> Self {
231 Self { config }
232 }
233
234 pub fn default() -> Self {
236 Self::new(ExtractionConfig::default())
237 }
238
239 pub fn config(&self) -> &ExtractionConfig {
241 &self.config
242 }
243
244 pub fn extract_from_page<R: std::io::Read + std::io::Seek>(
259 &mut self,
260 document: &PdfDocument<R>,
261 page_index: usize,
262 ) -> Result<ExtractedGraphics, ExtractionError> {
263 let page = document
265 .get_page(page_index as u32)
266 .map_err(|e| ExtractionError::ParseError(format!("Failed to get page: {}", e)))?;
267
268 let streams = document
270 .get_page_content_streams(&page)
271 .map_err(|e| ExtractionError::ParseError(format!("Failed to get content: {}", e)))?;
272
273 let mut graphics = ExtractedGraphics::new();
274 let mut state = GraphicsState::new();
275
276 for stream in streams {
278 let operations = ContentParser::parse(&stream).map_err(|e| {
279 ExtractionError::ParseError(format!("Failed to parse content: {}", e))
280 })?;
281
282 self.process_operations(&operations, &mut state, &mut graphics)?;
283 }
284
285 Ok(graphics)
286 }
287
288 fn process_operations(
290 &self,
291 operations: &[ContentOperation],
292 state: &mut GraphicsState,
293 graphics: &mut ExtractedGraphics,
294 ) -> Result<(), ExtractionError> {
295 for op in operations {
296 match op {
297 ContentOperation::SaveGraphicsState => state.save(),
299 ContentOperation::RestoreGraphicsState => state.restore(),
300 ContentOperation::SetLineWidth(w) => state.stroke_width = *w as f64,
301 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
302 state.apply_transform(
303 *a as f64, *b as f64, *c as f64, *d as f64, *e as f64, *f as f64,
304 );
305 }
306
307 ContentOperation::SetStrokingGray(gray) => {
309 state.stroke_color = Some(crate::graphics::Color::gray(*gray as f64));
310 }
311 ContentOperation::SetStrokingRGB(r, g, b) => {
312 state.stroke_color =
313 Some(crate::graphics::Color::rgb(*r as f64, *g as f64, *b as f64));
314 }
315 ContentOperation::SetStrokingCMYK(c, m, y, k) => {
316 state.stroke_color = Some(crate::graphics::Color::cmyk(
317 *c as f64, *m as f64, *y as f64, *k as f64,
318 ));
319 }
320
321 ContentOperation::MoveTo(x, y) => {
323 let (tx, ty) = state.transform_point(*x as f64, *y as f64);
324 state.move_to(tx, ty);
325 }
326 ContentOperation::LineTo(x, y) => {
327 let (tx, ty) = state.transform_point(*x as f64, *y as f64);
328 state.line_to(tx, ty);
329 }
330 ContentOperation::Rectangle(x, y, width, height) => {
331 self.extract_rectangle_lines(
332 *x as f64,
333 *y as f64,
334 *width as f64,
335 *height as f64,
336 state,
337 graphics,
338 );
339 }
340 ContentOperation::ClosePath => {
341 state.close_path();
342 }
343
344 ContentOperation::Stroke | ContentOperation::CloseStroke => {
346 self.extract_path_lines(state, graphics, true);
347 state.clear_path();
348 }
349 ContentOperation::Fill | ContentOperation::FillEvenOdd => {
350 if !self.config.stroked_only {
351 self.extract_path_lines(state, graphics, false);
352 }
353 state.clear_path();
354 }
355
356 _ => {} }
358 }
359
360 Ok(())
361 }
362
363 fn extract_rectangle_lines(
367 &self,
368 x: f64,
369 y: f64,
370 width: f64,
371 height: f64,
372 state: &GraphicsState,
373 graphics: &mut ExtractedGraphics,
374 ) {
375 let stroke_width = state.stroke_width;
376
377 let (x1, y1) = state.transform_point(x, y); let (x2, y2) = state.transform_point(x + width, y); let (x3, y3) = state.transform_point(x + width, y + height); let (x4, y4) = state.transform_point(x, y + height); graphics.add_line(VectorLine::new(x1, y1, x2, y2, stroke_width, true, None));
385
386 graphics.add_line(VectorLine::new(x2, y2, x3, y3, stroke_width, true, None));
388
389 graphics.add_line(VectorLine::new(x3, y3, x4, y4, stroke_width, true, None));
391
392 graphics.add_line(VectorLine::new(x4, y4, x1, y1, stroke_width, true, None));
394 }
395
396 fn extract_path_lines(
398 &self,
399 state: &GraphicsState,
400 graphics: &mut ExtractedGraphics,
401 is_stroked: bool,
402 ) {
403 let stroke_width = state.stroke_width;
404
405 for segment in &state.path {
406 let PathSegment::Line { x1, y1, x2, y2 } = segment;
407 let line = VectorLine::new(
408 *x1,
409 *y1,
410 *x2,
411 *y2,
412 stroke_width,
413 is_stroked,
414 state.stroke_color,
415 );
416
417 if self.config.stroked_only && !is_stroked {
419 continue;
420 }
421
422 if line.length() < self.config.min_line_length {
423 continue;
424 }
425
426 if !self.config.extract_diagonals && line.orientation == LineOrientation::Diagonal {
427 continue;
428 }
429
430 graphics.add_line(line);
431 }
432 }
433}
434
435struct GraphicsState {
437 ctm: [f64; 6],
439 stroke_width: f64,
441 stroke_color: Option<crate::graphics::Color>,
443 path: Vec<PathSegment>,
445 current_point: Option<(f64, f64)>,
447 state_stack: Vec<SavedState>,
449}
450
451#[derive(Clone)]
453struct SavedState {
454 ctm: [f64; 6],
455 stroke_width: f64,
456 stroke_color: Option<crate::graphics::Color>,
457}
458
459#[derive(Debug, Clone)]
461enum PathSegment {
462 Line { x1: f64, y1: f64, x2: f64, y2: f64 },
463}
464
465impl GraphicsState {
466 fn new() -> Self {
467 Self {
468 ctm: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0], stroke_width: 1.0,
470 stroke_color: None,
471 path: Vec::new(),
472 current_point: None,
473 state_stack: Vec::new(),
474 }
475 }
476
477 fn save(&mut self) {
478 self.state_stack.push(SavedState {
479 ctm: self.ctm,
480 stroke_width: self.stroke_width,
481 stroke_color: self.stroke_color,
482 });
483 }
484
485 fn restore(&mut self) {
486 if let Some(saved) = self.state_stack.pop() {
487 self.ctm = saved.ctm;
488 self.stroke_width = saved.stroke_width;
489 self.stroke_color = saved.stroke_color;
490 }
491 }
492
493 fn apply_transform(&mut self, a: f64, b: f64, c: f64, d: f64, e: f64, f: f64) {
494 let [a0, b0, c0, d0, e0, f0] = self.ctm;
495 self.ctm = [
496 a * a0 + b * c0,
497 a * b0 + b * d0,
498 c * a0 + d * c0,
499 c * b0 + d * d0,
500 e * a0 + f * c0 + e0,
501 e * b0 + f * d0 + f0,
502 ];
503 }
504
505 fn transform_point(&self, x: f64, y: f64) -> (f64, f64) {
509 let [a, b, c, d, e, f] = self.ctm;
510 let tx = a * x + c * y + e;
511 let ty = b * x + d * y + f;
512 (tx, ty)
513 }
514
515 fn move_to(&mut self, x: f64, y: f64) {
516 self.current_point = Some((x, y));
517 }
518
519 fn line_to(&mut self, x: f64, y: f64) {
520 if let Some((x1, y1)) = self.current_point {
521 self.path.push(PathSegment::Line {
522 x1,
523 y1,
524 x2: x,
525 y2: y,
526 });
527 self.current_point = Some((x, y));
528 }
529 }
530
531 fn close_path(&mut self) {
532 if let Some((start_x, start_y)) = self.path.first().map(|seg| match seg {
534 PathSegment::Line { x1, y1, .. } => (*x1, *y1),
535 }) {
536 if let Some((x, y)) = self.current_point {
537 const EPSILON: f64 = 0.01;
539 if (x - start_x).abs() > EPSILON || (y - start_y).abs() > EPSILON {
540 self.path.push(PathSegment::Line {
541 x1: x,
542 y1: y,
543 x2: start_x,
544 y2: start_y,
545 });
546 self.current_point = Some((start_x, start_y));
547 }
548 }
549 }
550 }
551
552 fn clear_path(&mut self) {
553 self.path.clear();
554 self.current_point = None;
555 }
556}
557
558#[derive(Debug)]
560pub enum ExtractionError {
561 InvalidOperator(String),
563 InvalidOperand(String),
565 IoError(std::io::Error),
567 ParseError(String),
569}
570
571impl fmt::Display for ExtractionError {
572 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
573 match self {
574 Self::InvalidOperator(op) => write!(f, "Invalid graphics operator: {}", op),
575 Self::InvalidOperand(msg) => write!(f, "Invalid operand: {}", msg),
576 Self::IoError(e) => write!(f, "I/O error: {}", e),
577 Self::ParseError(msg) => write!(f, "Parse error: {}", msg),
578 }
579 }
580}
581
582impl std::error::Error for ExtractionError {
583 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
584 match self {
585 Self::IoError(e) => Some(e),
586 _ => None,
587 }
588 }
589}
590
591impl From<std::io::Error> for ExtractionError {
592 fn from(err: std::io::Error) -> Self {
593 Self::IoError(err)
594 }
595}
596
597impl From<ParseError> for ExtractionError {
598 fn from(err: ParseError) -> Self {
599 Self::ParseError(format!("{}", err))
600 }
601}
602
603#[cfg(test)]
604mod tests {
605 use super::*;
606
607 #[test]
608 fn test_line_orientation_horizontal() {
609 let line = VectorLine::new(100.0, 200.0, 300.0, 200.0, 1.0, true, None);
610 assert_eq!(line.orientation, LineOrientation::Horizontal);
611 }
612
613 #[test]
614 fn test_line_orientation_vertical() {
615 let line = VectorLine::new(100.0, 200.0, 100.0, 400.0, 1.0, true, None);
616 assert_eq!(line.orientation, LineOrientation::Vertical);
617 }
618
619 #[test]
620 fn test_line_orientation_diagonal() {
621 let line = VectorLine::new(100.0, 200.0, 300.0, 400.0, 1.0, true, None);
622 assert_eq!(line.orientation, LineOrientation::Diagonal);
623 }
624
625 #[test]
626 fn test_line_orientation_tolerance() {
627 let line = VectorLine::new(100.0, 200.0, 300.0, 200.05, 1.0, true, None);
629 assert_eq!(line.orientation, LineOrientation::Horizontal);
630
631 let line = VectorLine::new(100.0, 200.0, 100.05, 400.0, 1.0, true, None);
633 assert_eq!(line.orientation, LineOrientation::Vertical);
634 }
635
636 #[test]
637 fn test_line_length() {
638 let line = VectorLine::new(0.0, 0.0, 3.0, 4.0, 1.0, true, None);
639 assert!((line.length() - 5.0).abs() < 0.001); }
641
642 #[test]
643 fn test_line_midpoint() {
644 let line = VectorLine::new(100.0, 200.0, 300.0, 400.0, 1.0, true, None);
645 let (mx, my) = line.midpoint();
646 assert!((mx - 200.0).abs() < 0.001);
647 assert!((my - 300.0).abs() < 0.001);
648 }
649
650 #[test]
651 fn test_extracted_graphics_add_line() {
652 let mut graphics = ExtractedGraphics::new();
653
654 graphics.add_line(VectorLine::new(0.0, 0.0, 100.0, 0.0, 1.0, true, None)); graphics.add_line(VectorLine::new(0.0, 0.0, 0.0, 100.0, 1.0, true, None)); graphics.add_line(VectorLine::new(0.0, 0.0, 100.0, 100.0, 1.0, true, None)); assert_eq!(graphics.horizontal_count, 1);
659 assert_eq!(graphics.vertical_count, 1);
660 assert_eq!(graphics.lines.len(), 3);
661 }
662
663 #[test]
664 fn test_extracted_graphics_iterators() {
665 let mut graphics = ExtractedGraphics::new();
666
667 graphics.add_line(VectorLine::new(0.0, 0.0, 100.0, 0.0, 1.0, true, None)); graphics.add_line(VectorLine::new(0.0, 0.0, 0.0, 100.0, 1.0, true, None)); graphics.add_line(VectorLine::new(0.0, 100.0, 100.0, 100.0, 1.0, true, None)); assert_eq!(graphics.horizontal_lines().count(), 2);
672 assert_eq!(graphics.vertical_lines().count(), 1);
673 }
674
675 #[test]
676 fn test_has_table_structure() {
677 let mut graphics = ExtractedGraphics::new();
678
679 assert!(!graphics.has_table_structure());
681
682 graphics.add_line(VectorLine::new(0.0, 0.0, 100.0, 0.0, 1.0, true, None));
684 graphics.add_line(VectorLine::new(0.0, 100.0, 100.0, 100.0, 1.0, true, None));
685 graphics.add_line(VectorLine::new(0.0, 0.0, 0.0, 100.0, 1.0, true, None));
686 assert!(!graphics.has_table_structure());
687
688 graphics.add_line(VectorLine::new(100.0, 0.0, 100.0, 100.0, 1.0, true, None));
690 assert!(graphics.has_table_structure());
691 }
692
693 #[test]
694 fn test_extraction_config_default() {
695 let config = ExtractionConfig::default();
696 assert_eq!(config.min_line_length, 1.0);
697 assert!(!config.extract_diagonals);
698 assert!(config.stroked_only);
699 }
700
701 #[test]
703 fn test_ctm_transform_point_identity() {
704 let state = GraphicsState::new();
705 let (tx, ty) = state.transform_point(100.0, 200.0);
706 assert!((tx - 100.0).abs() < 0.001);
707 assert!((ty - 200.0).abs() < 0.001);
708 }
709
710 #[test]
711 fn test_ctm_transform_point_translation() {
712 let mut state = GraphicsState::new();
713 state.apply_transform(1.0, 0.0, 0.0, 1.0, 50.0, 75.0);
715
716 let (tx, ty) = state.transform_point(100.0, 200.0);
717 assert!((tx - 150.0).abs() < 0.001); assert!((ty - 275.0).abs() < 0.001); }
720
721 #[test]
722 fn test_ctm_transform_point_scale() {
723 let mut state = GraphicsState::new();
724 state.apply_transform(2.0, 0.0, 0.0, 2.0, 0.0, 0.0);
726
727 let (tx, ty) = state.transform_point(100.0, 200.0);
728 assert!((tx - 200.0).abs() < 0.001); assert!((ty - 400.0).abs() < 0.001); }
731
732 #[test]
733 fn test_ctm_transform_point_combined() {
734 let mut state = GraphicsState::new();
735 state.apply_transform(2.0, 0.0, 0.0, 2.0, 10.0, 20.0);
737
738 let (tx, ty) = state.transform_point(5.0, 5.0);
739 assert!((tx - 20.0).abs() < 0.001); assert!((ty - 30.0).abs() < 0.001); }
742
743 #[test]
744 fn test_graphics_state_save_restore() {
745 let mut state = GraphicsState::new();
746 state.stroke_width = 2.0;
747 state.apply_transform(2.0, 0.0, 0.0, 2.0, 10.0, 20.0);
748
749 state.save();
750 state.stroke_width = 5.0;
751 state.apply_transform(1.0, 0.0, 0.0, 1.0, 50.0, 50.0);
752
753 state.restore();
754 assert_eq!(state.stroke_width, 2.0);
755
756 let (tx, ty) = state.transform_point(5.0, 5.0);
758 assert!((tx - 20.0).abs() < 0.001);
759 assert!((ty - 30.0).abs() < 0.001);
760 }
761
762 #[test]
763 fn test_graphics_state_nested_save_restore() {
764 let mut state = GraphicsState::new();
765 state.stroke_width = 2.0;
766
767 state.save();
768 state.stroke_width = 5.0;
769
770 state.save();
771 state.stroke_width = 10.0;
772
773 state.restore();
774 assert_eq!(state.stroke_width, 5.0);
775
776 state.restore();
777 assert_eq!(state.stroke_width, 2.0);
778
779 state.restore();
781 assert_eq!(state.stroke_width, 2.0);
782 }
783
784 #[test]
785 fn test_close_path_creates_closing_line() {
786 let mut state = GraphicsState::new();
787
788 state.move_to(100.0, 100.0);
790 state.line_to(200.0, 100.0);
791 state.line_to(200.0, 200.0);
792 state.close_path();
793
794 assert_eq!(state.path.len(), 3);
796
797 let PathSegment::Line { x1, y1, x2, y2 } = &state.path[2];
799 assert!((*x1 - 200.0).abs() < 0.01);
800 assert!((*y1 - 200.0).abs() < 0.01);
801 assert!((*x2 - 100.0).abs() < 0.01);
802 assert!((*y2 - 100.0).abs() < 0.01);
803 }
804
805 #[test]
806 fn test_close_path_no_duplicate_if_already_closed() {
807 let mut state = GraphicsState::new();
808
809 state.move_to(100.0, 100.0);
811 state.line_to(200.0, 100.0);
812 state.line_to(200.0, 200.0);
813 state.line_to(100.0, 200.0);
814 state.line_to(100.0, 100.0); state.close_path(); assert_eq!(state.path.len(), 4);
819 }
820}