Skip to main content

pdfplumber_core/
lib.rs

1//! Backend-independent data types and algorithms for pdfplumber-rs.
2//!
3//! This crate provides the foundational types ([`BBox`], [`Char`], [`Word`],
4//! [`Line`], [`Rect`], [`Table`], etc.) and algorithms (text grouping, table
5//! detection) used by pdfplumber-rs. It has no required external dependencies —
6//! all functionality is pure Rust.
7//!
8//! # Modules
9//!
10//! - [`geometry`] — Geometric primitives: [`Point`], [`BBox`], [`Ctm`], [`Orientation`]
11//! - [`text`] — Character data: [`Char`], [`TextDirection`], CJK detection
12//! - [`words`] — Word extraction: [`Word`], [`WordExtractor`], [`WordOptions`]
13//! - [`layout`] — Text layout: [`TextLine`], [`TextBlock`], [`TextOptions`]
14//! - [`shapes`] — Shapes from painted paths: [`Line`], [`Rect`], [`Curve`]
15//! - [`edges`] — Edge derivation for table detection: [`Edge`], [`EdgeSource`]
16//! - [`table`] — Table detection: [`Table`], [`TableFinder`], [`TableSettings`]
17//! - [`images`] — Image extraction: [`Image`], [`ImageMetadata`]
18//! - [`painting`] — Graphics state: [`Color`], [`GraphicsState`], [`PaintedPath`]
19//! - [`path`] — Path construction: [`Path`], [`PathBuilder`], [`PathSegment`]
20//! - [`encoding`] — Font encoding: [`FontEncoding`], [`EncodingResolver`]
21//! - [`error`] — Errors and warnings: [`PdfError`], [`ExtractWarning`], [`ExtractOptions`]
22
23#![deny(missing_docs)]
24
25/// Edge derivation from geometric primitives for table detection.
26pub mod edges;
27/// Font encoding mapping (Standard, Windows, Mac, Custom).
28pub mod encoding;
29/// Error and warning types for PDF processing.
30pub mod error;
31/// Geometric primitives: Point, BBox, CTM, Orientation.
32pub mod geometry;
33/// Image extraction and metadata.
34pub mod images;
35/// Text layout: words → lines → blocks, reading order, text output.
36pub mod layout;
37/// Graphics state, colors, dash patterns, and painted paths.
38pub mod painting;
39/// PDF path construction (MoveTo, LineTo, CurveTo, ClosePath).
40pub mod path;
41/// Shape extraction: Lines, Rects, Curves from painted paths.
42pub mod shapes;
43/// Table detection: lattice, stream, and explicit strategies.
44pub mod table;
45/// Character data types and CJK detection.
46pub mod text;
47/// Word extraction from characters based on spatial proximity.
48pub mod words;
49
50pub use edges::{Edge, EdgeSource, derive_edges, edge_from_curve, edge_from_line, edges_from_rect};
51pub use encoding::{EncodingResolver, FontEncoding, StandardEncoding};
52pub use error::{ExtractOptions, ExtractResult, ExtractWarning, PdfError};
53pub use geometry::{BBox, Ctm, Orientation, Point};
54pub use images::{Image, ImageMetadata, image_from_ctm};
55pub use layout::{
56    TextBlock, TextLine, TextOptions, blocks_to_text, cluster_lines_into_blocks,
57    cluster_words_into_lines, sort_blocks_reading_order, split_lines_at_columns, words_to_text,
58};
59pub use painting::{Color, DashPattern, ExtGState, FillRule, GraphicsState, PaintedPath};
60pub use path::{Path, PathBuilder, PathSegment};
61pub use shapes::{Curve, Line, LineOrientation, Rect, extract_shapes};
62pub use table::{
63    Cell, ExplicitLines, Intersection, Strategy, Table, TableFinder, TableSettings,
64    cells_to_tables, edges_to_intersections, explicit_lines_to_edges, extract_text_for_cells,
65    intersections_to_cells, join_edge_group, snap_edges, words_to_edges_stream,
66};
67pub use text::{Char, TextDirection, is_cjk, is_cjk_text};
68pub use words::{Word, WordExtractor, WordOptions};