[package]
edition = "2021"
name = "pdf_oxide"
version = "0.3.5"
authors = ["Yury Fedoseev <yfedoseev@gmail.com>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "The Complete PDF Toolkit: extract, create, and edit PDFs. Rust core with bindings for Python, Node, WASM, Go, and more."
homepage = "https://github.com/yfedoseev/pdf_oxide"
documentation = "https://docs.rs/pdf_oxide"
readme = "README.md"
keywords = [
"pdf",
"text-extraction",
"pdf-creation",
"pdf-editing",
"parser",
]
categories = [
"parsing",
"text-processing",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/yfedoseev/pdf_oxide"
[package.metadata.docs.rs]
features = [
"python",
"logging",
"debug-span-merging",
]
rustdoc-args = [
"--cfg",
"docsrs",
]
[package.metadata.maturin]
name = "pdf_oxide"
python-source = "python"
[features]
barcodes = [
"dep:qrcode",
"dep:barcoders",
]
debug-span-merging = []
default = []
gpu = [
"dep:ort",
"ml",
]
logging = []
ml = [
"dep:tract-onnx",
"dep:ndarray",
"dep:linfa",
"dep:linfa-clustering",
"dep:tokenizers",
]
ocr = [
"dep:ort",
"dep:imageproc",
"dep:ndarray",
]
office = [
"dep:calamine",
"dep:zip",
]
python = ["pyo3"]
rendering = [
"dep:tiny-skia",
"dep:fontdb",
"dep:rustybuzz",
]
signatures = [
"dep:x509-parser",
"dep:cms",
"dep:rsa",
"dep:der",
"dep:spki",
"dep:pkcs1",
"dep:pkcs8",
"dep:signature",
"dep:sha1",
]
table-ml = [
"ml",
"dep:pdfium-render",
]
wasm = [
"dep:wasm-bindgen",
"dep:web-sys",
"dep:js-sys",
"dep:console_error_panic_hook",
"dep:getrandom",
]
wasm-ml = [
"wasm",
"ml",
]
[lib]
name = "pdf_oxide"
crate-type = [
"cdylib",
"rlib",
]
path = "src/lib.rs"
[[bin]]
name = "analyze_gaps"
path = "src/bin/analyze_gaps.rs"
[[bin]]
name = "analyze_pdf_features"
path = "src/bin/analyze_pdf_features.rs"
[[bin]]
name = "benchmark_all_pdfs"
path = "src/bin/benchmark_all_pdfs.rs"
[[bin]]
name = "debug_extraction"
path = "src/bin/debug_extraction.rs"
[[bin]]
name = "export_to_html"
path = "src/bin/export_to_html.rs"
[[bin]]
name = "export_to_markdown"
path = "src/bin/export_to_markdown.rs"
[[bin]]
name = "export_to_text"
path = "src/bin/export_to_text.rs"
[[bin]]
name = "extract_with_pipeline"
path = "src/bin/extract_with_pipeline.rs"
[[bin]]
name = "extract_with_published_crate"
path = "src/bin/extract_with_published_crate.rs"
[[bin]]
name = "validate_content"
path = "src/bin/validate_content.rs"
[[bin]]
name = "validate_dataset"
path = "src/bin/validate_dataset.rs"
[[bin]]
name = "validate_structured"
path = "src/bin/validate_structured.rs"
[[example]]
name = "create_pdf_from_markdown"
path = "examples/create_pdf_from_markdown.rs"
[[example]]
name = "create_pdf_with_images"
path = "examples/create_pdf_with_images.rs"
[[example]]
name = "debug_page_detection"
path = "examples/debug_page_detection.rs"
[[example]]
name = "debug_xobjects"
path = "examples/debug_xobjects.rs"
[[example]]
name = "debug_zero_chars"
path = "examples/debug_zero_chars.rs"
[[example]]
name = "edit_existing_pdf"
path = "examples/edit_existing_pdf.rs"
[[example]]
name = "edit_text_content"
path = "examples/edit_text_content.rs"
[[example]]
name = "extract_structured"
path = "examples/extract_structured.rs"
[[example]]
name = "extract_text_simple"
path = "examples/extract_text_simple.rs"
[[example]]
name = "inspect_char_codes"
path = "examples/inspect_char_codes.rs"
[[example]]
name = "ocr_scanned_pdf"
path = "examples/ocr_scanned_pdf.rs"
[[example]]
name = "test_pdf"
path = "examples/test_pdf.rs"
[[example]]
name = "test_spacing_fix"
path = "examples/test_spacing_fix.rs"
[[example]]
name = "verify_corpus"
path = "examples/verify_corpus.rs"
[[test]]
name = "test_4byte_character_codes"
path = "tests/test_4byte_character_codes.rs"
[[test]]
name = "test_actualtext_extraction"
path = "tests/test_actualtext_extraction.rs"
[[test]]
name = "test_adaptive_threshold"
path = "tests/test_adaptive_threshold.rs"
[[test]]
name = "test_advanced_cmap_directives"
path = "tests/test_advanced_cmap_directives.rs"
[[test]]
name = "test_advanced_cmap_features"
path = "tests/test_advanced_cmap_features.rs"
[[test]]
name = "test_alice_wonderland"
path = "tests/test_alice_wonderland.rs"
[[test]]
name = "test_annotation_integration"
path = "tests/test_annotation_integration.rs"
[[test]]
name = "test_blank_pages"
path = "tests/test_blank_pages.rs"
[[test]]
name = "test_ccitt_decoder_diagnosis"
path = "tests/test_ccitt_decoder_diagnosis.rs"
[[test]]
name = "test_ccitt_edge_cases"
path = "tests/test_ccitt_edge_cases.rs"
[[test]]
name = "test_ccitt_params_extraction"
path = "tests/test_ccitt_params_extraction.rs"
[[test]]
name = "test_cff_font_support"
path = "tests/test_cff_font_support.rs"
[[test]]
name = "test_character_mapping_fixes"
path = "tests/test_character_mapping_fixes.rs"
[[test]]
name = "test_character_tracking_integration"
path = "tests/test_character_tracking_integration.rs"
[[test]]
name = "test_cid_to_gid_map"
path = "tests/test_cid_to_gid_map.rs"
[[test]]
name = "test_citation_detection"
path = "tests/test_citation_detection.rs"
[[test]]
name = "test_cjk_script_support"
path = "tests/test_cjk_script_support.rs"
[[test]]
name = "test_cmap_caching"
path = "tests/test_cmap_caching.rs"
[[test]]
name = "test_complex_script_support"
path = "tests/test_complex_script_support.rs"
[[test]]
name = "test_compliance"
path = "tests/test_compliance.rs"
[[test]]
name = "test_config_adapter"
path = "tests/test_config_adapter.rs"
[[test]]
name = "test_converters"
path = "tests/test_converters.rs"
[[test]]
name = "test_custom_encoding"
path = "tests/test_custom_encoding.rs"
[[test]]
name = "test_cyclic_page_tree"
path = "tests/test_cyclic_page_tree.rs"
[[test]]
name = "test_debug_visualization"
path = "tests/test_debug_visualization.rs"
[[test]]
name = "test_decoders"
path = "tests/test_decoders.rs"
[[test]]
name = "test_document_pipeline_integration"
path = "tests/test_document_pipeline_integration.rs"
[[test]]
name = "test_document_structure"
path = "tests/test_document_structure.rs"
[[test]]
name = "test_dom_api"
path = "tests/test_dom_api.rs"
[[test]]
name = "test_editor"
path = "tests/test_editor.rs"
[[test]]
name = "test_editor_form_fields"
path = "tests/test_editor_form_fields.rs"
[[test]]
name = "test_editor_hierarchical_fields"
path = "tests/test_editor_hierarchical_fields.rs"
[[test]]
name = "test_editor_modify_properties"
path = "tests/test_editor_modify_properties.rs"
[[test]]
name = "test_email_citation_detection"
path = "tests/test_email_citation_detection.rs"
[[test]]
name = "test_embedded_files"
path = "tests/test_embedded_files.rs"
[[test]]
name = "test_encryption_write"
path = "tests/test_encryption_write.rs"
[[test]]
name = "test_extended_ascii_character_mapping"
path = "tests/test_extended_ascii_character_mapping.rs"
[[test]]
name = "test_extract_chars_integration"
path = "tests/test_extract_chars_integration.rs"
[[test]]
name = "test_fdf_export"
path = "tests/test_fdf_export.rs"
[[test]]
name = "test_form_fields"
path = "tests/test_form_fields.rs"
[[test]]
name = "test_foundation"
path = "tests/test_foundation.rs"
[[test]]
name = "test_full_extraction"
path = "tests/test_full_extraction.rs"
[[test]]
name = "test_hierarchical_integration"
path = "tests/test_hierarchical_integration.rs"
[[test]]
name = "test_high_level_api"
path = "tests/test_high_level_api.rs"
[[test]]
name = "test_hyphenation_integration"
path = "tests/test_hyphenation_integration.rs"
[[test]]
name = "test_image_embedding"
path = "tests/test_image_embedding.rs"
[[test]]
name = "test_image_filters"
path = "tests/test_image_filters.rs"
[[test]]
name = "test_images"
path = "tests/test_images.rs"
[[test]]
name = "test_layout"
path = "tests/test_layout.rs"
[[test]]
name = "test_lazy_cmap_loading"
path = "tests/test_lazy_cmap_loading.rs"
[[test]]
name = "test_ligature_expansion"
path = "tests/test_ligature_expansion.rs"
[[test]]
name = "test_markdown_extraction_quality"
path = "tests/test_markdown_extraction_quality.rs"
[[test]]
name = "test_multibyte_cid_support"
path = "tests/test_multibyte_cid_support.rs"
[[test]]
name = "test_object_resolution"
path = "tests/test_object_resolution.rs"
[[test]]
name = "test_objstm"
path = "tests/test_objstm.rs"
[[test]]
name = "test_ocr"
path = "tests/test_ocr.rs"
[[test]]
name = "test_ocr_inference"
path = "tests/test_ocr_inference.rs"
[[test]]
name = "test_ocr_integration"
path = "tests/test_ocr_integration.rs"
[[test]]
name = "test_ocr_module"
path = "tests/test_ocr_module.rs"
[[test]]
name = "test_ocr_scanned_document"
path = "tests/test_ocr_scanned_document.rs"
[[test]]
name = "test_ocr_with_models"
path = "tests/test_ocr_with_models.rs"
[[test]]
name = "test_optimized_cmap_parsing"
path = "tests/test_optimized_cmap_parsing.rs"
[[test]]
name = "test_outline"
path = "tests/test_outline.rs"
[[test]]
name = "test_page_operations"
path = "tests/test_page_operations.rs"
[[test]]
name = "test_page_scanning_fallback"
path = "tests/test_page_scanning_fallback.rs"
[[test]]
name = "test_parent_tree_lookup"
path = "tests/test_parent_tree_lookup.rs"
[[test]]
name = "test_path_extraction"
path = "tests/test_path_extraction.rs"
[[test]]
name = "test_pattern_detection"
path = "tests/test_pattern_detection.rs"
[[test]]
name = "test_pdf_ccitt_params"
path = "tests/test_pdf_ccitt_params.rs"
[[test]]
name = "test_pdf_header_parsing"
path = "tests/test_pdf_header_parsing.rs"
[[test]]
name = "test_pipeline_html_converter"
path = "tests/test_pipeline_html_converter.rs"
[[test]]
name = "test_pipeline_integration"
path = "tests/test_pipeline_integration.rs"
[[test]]
name = "test_pipeline_markdown_converter"
path = "tests/test_pipeline_markdown_converter.rs"
[[test]]
name = "test_predefined_cmap_loading"
path = "tests/test_predefined_cmap_loading.rs"
[[test]]
name = "test_predefined_cmaps"
path = "tests/test_predefined_cmaps.rs"
[[test]]
name = "test_pride_prejudice"
path = "tests/test_pride_prejudice.rs"
[[test]]
name = "test_primary_detection_mode_impl"
path = "tests/test_primary_detection_mode_impl.rs"
[[test]]
name = "test_reading_order_priority"
path = "tests/test_reading_order_priority.rs"
[[test]]
name = "test_rendering"
path = "tests/test_rendering.rs"
[[test]]
name = "test_rtl_script_support"
path = "tests/test_rtl_script_support.rs"
[[test]]
name = "test_search"
path = "tests/test_search.rs"
[[test]]
name = "test_spacing_integration"
path = "tests/test_spacing_integration.rs"
[[test]]
name = "test_spacing_spec_compliant"
path = "tests/test_spacing_spec_compliant.rs"
[[test]]
name = "test_spec_compliance_fallback"
path = "tests/test_spec_compliance_fallback.rs"
[[test]]
name = "test_stream_filters"
path = "tests/test_stream_filters.rs"
[[test]]
name = "test_table_extraction"
path = "tests/test_table_extraction.rs"
[[test]]
name = "test_tables"
path = "tests/test_tables.rs"
[[test]]
name = "test_text_justification_alignment"
path = "tests/test_text_justification_alignment.rs"
[[test]]
name = "test_textchar_transformation"
path = "tests/test_textchar_transformation.rs"
[[test]]
name = "test_type0_agl_fallback"
path = "tests/test_type0_agl_fallback.rs"
[[test]]
name = "test_unicode_font_embedding"
path = "tests/test_unicode_font_embedding.rs"
[[test]]
name = "test_whitespace_normalization"
path = "tests/test_whitespace_normalization.rs"
[[test]]
name = "test_word_boundary_character_tracking"
path = "tests/test_word_boundary_character_tracking.rs"
[[test]]
name = "test_word_boundary_detection"
path = "tests/test_word_boundary_detection.rs"
[[test]]
name = "test_word_boundary_integration"
path = "tests/test_word_boundary_integration.rs"
[[test]]
name = "test_word_boundary_mode_branching"
path = "tests/test_word_boundary_mode_branching.rs"
[[test]]
name = "test_word_boundary_mode_config"
path = "tests/test_word_boundary_mode_config.rs"
[[test]]
name = "test_word_boundary_performance"
path = "tests/test_word_boundary_performance.rs"
[[test]]
name = "test_xobject_path_extraction"
path = "tests/test_xobject_path_extraction.rs"
[[test]]
name = "test_zero_byte_font_handling"
path = "tests/test_zero_byte_font_handling.rs"
[[bench]]
name = "cjk_benchmarks"
path = "benches/cjk_benchmarks.rs"
harness = false
[[bench]]
name = "complex_script_benchmarks"
path = "benches/complex_script_benchmarks.rs"
harness = false
[[bench]]
name = "full_pipeline_benchmarks"
path = "benches/full_pipeline_benchmarks.rs"
harness = false
[[bench]]
name = "ligature_benchmarks"
path = "benches/ligature_benchmarks.rs"
harness = false
[[bench]]
name = "pdf_extraction_performance"
path = "benches/pdf_extraction_performance.rs"
harness = false
[[bench]]
name = "rtl_benchmarks"
path = "benches/rtl_benchmarks.rs"
harness = false
[[bench]]
name = "script_detection_benchmarks"
path = "benches/script_detection_benchmarks.rs"
harness = false
[[bench]]
name = "word_boundary_benchmarks"
path = "benches/word_boundary_benchmarks.rs"
harness = false
[dependencies.aes]
version = "0.8"
[dependencies.barcoders]
version = "2.0"
features = ["image"]
optional = true
[dependencies.base64]
version = "0.22"
[dependencies.bitflags]
version = "2"
[dependencies.byteorder]
version = "1.5"
[dependencies.bytes]
version = "1.5"
[dependencies.calamine]
version = "0.26"
optional = true
[dependencies.cbc]
version = "0.1"
[dependencies.chrono]
version = "0.4"
[dependencies.cms]
version = "0.2"
optional = true
[dependencies.console_error_panic_hook]
version = "0.1"
optional = true
[dependencies.der]
version = "0.7"
optional = true
[dependencies.env_logger]
version = "0.11"
[dependencies.fax]
version = "0.2"
[dependencies.flate2]
version = "1.0"
[dependencies.fontdb]
version = "0.16"
optional = true
[dependencies.getrandom]
version = "0.3"
optional = true
[dependencies.image]
version = "0.24"
features = [
"png",
"jpeg",
"tiff",
]
default-features = false
[dependencies.imageproc]
version = "0.25"
optional = true
[dependencies.indexmap]
version = "2.2"
[dependencies.inflate]
version = "0.4"
[dependencies.js-sys]
version = "0.3"
optional = true
[dependencies.lazy_static]
version = "1.4"
[dependencies.libflate]
version = "2.1"
[dependencies.linfa]
version = "0.7"
optional = true
[dependencies.linfa-clustering]
version = "0.7"
optional = true
[dependencies.log]
version = "0.4"
[dependencies.lzw]
version = "0.10.0"
[dependencies.md-5]
version = "0.10"
[dependencies.ndarray]
version = "0.17"
features = ["std"]
optional = true
[dependencies.nom]
version = "8.0"
[dependencies.ort]
version = "2.0.0-rc.10"
features = [
"ndarray",
"download-binaries",
]
optional = true
default-features = false
[dependencies.pdfium-render]
version = "0.8"
optional = true
[dependencies.phf]
version = "0.11"
features = ["macros"]
[dependencies.pkcs1]
version = "0.7"
optional = true
[dependencies.pkcs8]
version = "0.10"
optional = true
[dependencies.pyo3]
version = "0.27"
features = ["abi3-py38"]
optional = true
[dependencies.qrcode]
version = "0.14"
optional = true
[dependencies.quick-xml]
version = "0.31"
[dependencies.regex]
version = "1.10"
[dependencies.rsa]
version = "0.9"
optional = true
[dependencies.rustybuzz]
version = "0.14"
optional = true
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0"
[dependencies.sha1]
version = "0.10"
optional = true
[dependencies.sha2]
version = "0.10"
[dependencies.signature]
version = "2.2"
optional = true
[dependencies.spki]
version = "0.7"
optional = true
[dependencies.stringprep]
version = "0.1"
[dependencies.thiserror]
version = "1.0"
[dependencies.tiff]
version = "0.11"
[dependencies.tiny-skia]
version = "0.11"
optional = true
[dependencies.tokenizers]
version = "0.15"
features = ["onig"]
optional = true
default-features = false
[dependencies.tract-onnx]
version = "0.21"
optional = true
[dependencies.ttf-parser]
version = "0.24"
[dependencies.uuid]
version = "1.0"
features = ["v4"]
[dependencies.wasm-bindgen]
version = "0.2"
optional = true
[dependencies.web-sys]
version = "0.3"
features = ["console"]
optional = true
[dependencies.weezl]
version = "0.1"
[dependencies.x509-parser]
version = "0.18"
optional = true
[dependencies.zip]
version = "7.0"
features = ["deflate"]
optional = true
default-features = false
[dev-dependencies.crc32fast]
version = "1.3"
[dev-dependencies.criterion]
version = "0.8"
[dev-dependencies.proptest]
version = "1.4"
[dev-dependencies.regex]
version = "1.10"
[dev-dependencies.tempfile]
version = "3.10"
[profile.bench]
opt-level = 3
lto = true
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
strip = true
[profile.release-small]
opt-level = "z"
lto = true
codegen-units = 1
panic = "abort"
inherits = "release"
strip = true