[[bench]]
harness = false
name = "cjk_benchmarks"
path = "benches/cjk_benchmarks.rs"
[[bench]]
harness = false
name = "complex_script_benchmarks"
path = "benches/complex_script_benchmarks.rs"
[[bench]]
harness = false
name = "full_pipeline_benchmarks"
path = "benches/full_pipeline_benchmarks.rs"
[[bench]]
harness = false
name = "ligature_benchmarks"
path = "benches/ligature_benchmarks.rs"
[[bench]]
harness = false
name = "pdf_extraction_performance"
path = "benches/pdf_extraction_performance.rs"
[[bench]]
harness = false
name = "rtl_benchmarks"
path = "benches/rtl_benchmarks.rs"
[[bench]]
harness = false
name = "script_detection_benchmarks"
path = "benches/script_detection_benchmarks.rs"
[[bench]]
harness = false
name = "word_boundary_benchmarks"
path = "benches/word_boundary_benchmarks.rs"
[[bin]]
name = "analyze_gaps"
path = "src/bin/analyze_gaps.rs"
[[bin]]
name = "analyze_pdf_features"
path = "src/bin/analyze_pdf_features.rs"
[[bin]]
name = "benchmark_all_pdfs"
path = "src/bin/benchmark_all_pdfs.rs"
[[bin]]
name = "debug_extraction"
path = "src/bin/debug_extraction.rs"
[[bin]]
name = "export_to_html"
path = "src/bin/export_to_html.rs"
[[bin]]
name = "export_to_markdown"
path = "src/bin/export_to_markdown.rs"
[[bin]]
name = "export_to_text"
path = "src/bin/export_to_text.rs"
[[bin]]
name = "extract_with_pipeline"
path = "src/bin/extract_with_pipeline.rs"
[[bin]]
name = "extract_with_published_crate"
path = "src/bin/extract_with_published_crate.rs"
[[bin]]
name = "validate_content"
path = "src/bin/validate_content.rs"
[[bin]]
name = "validate_dataset"
path = "src/bin/validate_dataset.rs"
[[bin]]
name = "validate_structured"
path = "src/bin/validate_structured.rs"
[dependencies.aes]
version = "0.8"
[dependencies.barcoders]
features = ["image"]
optional = true
version = "2.0"
[dependencies.base64]
version = "0.22"
[dependencies.bitflags]
version = "2"
[dependencies.byteorder]
version = "1.5"
[dependencies.bytes]
version = "1.5"
[dependencies.calamine]
optional = true
version = "0.26"
[dependencies.cbc]
version = "0.1"
[dependencies.chrono]
version = "0.4"
[dependencies.cms]
optional = true
version = "0.2"
[dependencies.console_error_panic_hook]
optional = true
version = "0.1"
[dependencies.der]
optional = true
version = "0.7"
[dependencies.env_logger]
version = "0.11"
[dependencies.fax]
version = "0.2"
[dependencies.flate2]
version = "1.0"
[dependencies.fontdb]
optional = true
version = "0.16"
[dependencies.getrandom]
features = ["js"]
optional = true
version = "0.2"
[dependencies.image]
default-features = false
features = ["png", "jpeg", "tiff"]
version = "0.24"
[dependencies.imageproc]
optional = true
version = "0.25"
[dependencies.indexmap]
version = "2.2"
[dependencies.inflate]
version = "0.4"
[dependencies.js-sys]
optional = true
version = "0.3"
[dependencies.lazy_static]
version = "1.4"
[dependencies.libflate]
version = "2.1"
[dependencies.linfa]
optional = true
version = "0.7"
[dependencies.linfa-clustering]
optional = true
version = "0.7"
[dependencies.log]
version = "0.4"
[dependencies.lzw]
version = "0.10.0"
[dependencies.md-5]
version = "0.10"
[dependencies.ndarray]
features = ["std"]
optional = true
version = "0.16"
[dependencies.nom]
version = "7.1"
[dependencies.ort]
default-features = false
features = ["ndarray", "download-binaries"]
optional = true
version = "2.0.0-rc.10"
[dependencies.pdfium-render]
optional = true
version = "0.8"
[dependencies.phf]
features = ["macros"]
version = "0.11"
[dependencies.pkcs1]
optional = true
version = "0.7"
[dependencies.pkcs8]
optional = true
version = "0.10"
[dependencies.pyo3]
features = ["extension-module"]
optional = true
version = "0.27"
[dependencies.qrcode]
optional = true
version = "0.14"
[dependencies.quick-xml]
version = "0.31"
[dependencies.regex]
version = "1.10"
[dependencies.rsa]
optional = true
version = "0.9"
[dependencies.rustybuzz]
optional = true
version = "0.14"
[dependencies.serde]
features = ["derive"]
version = "1.0"
[dependencies.serde_json]
version = "1.0"
[dependencies.sha1]
optional = true
version = "0.10"
[dependencies.sha2]
version = "0.10"
[dependencies.signature]
optional = true
version = "2.2"
[dependencies.spki]
optional = true
version = "0.7"
[dependencies.thiserror]
version = "1.0"
[dependencies.tiff]
version = "0.9"
[dependencies.tiny-skia]
optional = true
version = "0.11"
[dependencies.tokenizers]
default-features = false
features = ["onig"]
optional = true
version = "0.15"
[dependencies.tract-onnx]
optional = true
version = "0.21"
[dependencies.ttf-parser]
version = "0.24"
[dependencies.uuid]
features = ["v4"]
version = "1.0"
[dependencies.wasm-bindgen]
optional = true
version = "0.2"
[dependencies.web-sys]
features = ["console"]
optional = true
version = "0.3"
[dependencies.weezl]
version = "0.1"
[dependencies.x509-parser]
optional = true
version = "0.16"
[dependencies.zip]
default-features = false
features = ["deflate"]
optional = true
version = "2.2"
[dev-dependencies.crc32fast]
version = "1.3"
[dev-dependencies.criterion]
version = "0.5"
[dev-dependencies.proptest]
version = "1.4"
[dev-dependencies.regex]
version = "1.10"
[dev-dependencies.tempfile]
version = "3.10"
[[example]]
name = "create_pdf_from_markdown"
path = "examples/create_pdf_from_markdown.rs"
[[example]]
name = "create_pdf_with_images"
path = "examples/create_pdf_with_images.rs"
[[example]]
name = "debug_page_detection"
path = "examples/debug_page_detection.rs"
[[example]]
name = "debug_xobjects"
path = "examples/debug_xobjects.rs"
[[example]]
name = "debug_zero_chars"
path = "examples/debug_zero_chars.rs"
[[example]]
name = "edit_existing_pdf"
path = "examples/edit_existing_pdf.rs"
[[example]]
name = "edit_text_content"
path = "examples/edit_text_content.rs"
[[example]]
name = "extract_structured"
path = "examples/extract_structured.rs"
[[example]]
name = "extract_text_simple"
path = "examples/extract_text_simple.rs"
[[example]]
name = "inspect_char_codes"
path = "examples/inspect_char_codes.rs"
[[example]]
name = "ocr_scanned_pdf"
path = "examples/ocr_scanned_pdf.rs"
[[example]]
name = "test_pdf"
path = "examples/test_pdf.rs"
[[example]]
name = "test_spacing_fix"
path = "examples/test_spacing_fix.rs"
[features]
barcodes = ["dep:qrcode", "dep:barcoders"]
debug-span-merging = []
default = []
gpu = ["dep:ort", "ml"]
logging = []
ml = ["dep:tract-onnx", "dep:ndarray", "dep:linfa", "dep:linfa-clustering", "dep:tokenizers"]
ocr = ["dep:ort", "dep:imageproc", "dep:ndarray"]
office = ["dep:calamine", "dep:zip"]
python = ["pyo3"]
rendering = ["dep:tiny-skia", "dep:fontdb", "dep:rustybuzz"]
signatures = ["dep:x509-parser", "dep:cms", "dep:rsa", "dep:der", "dep:spki", "dep:pkcs1", "dep:pkcs8", "dep:signature", "dep:sha1"]
table-ml = ["ml", "dep:pdfium-render"]
wasm = ["dep:wasm-bindgen", "dep:web-sys", "dep:js-sys", "dep:console_error_panic_hook", "dep:getrandom"]
wasm-ml = ["wasm", "ml"]
[lib]
crate-type = ["cdylib", "rlib"]
name = "pdf_oxide"
path = "src/lib.rs"
[package]
authors = ["Yury Fedoseev <yfedoseev@gmail.com>"]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["parsing", "text-processing"]
description = "The Complete PDF Toolkit: extract, create, and edit PDFs. Rust core with bindings for Python, Node, WASM, Go, and more."
documentation = "https://docs.rs/pdf_oxide"
edition = "2021"
homepage = "https://github.com/yfedoseev/pdf_oxide"
keywords = ["pdf", "text-extraction", "pdf-creation", "pdf-editing", "parser"]
license = "MIT OR Apache-2.0"
name = "pdf_oxide"
readme = "README.md"
repository = "https://github.com/yfedoseev/pdf_oxide"
version = "0.3.0"
[package.metadata.docs.rs]
features = ["python", "logging", "debug-span-merging"]
rustdoc-args = ["--cfg", "docsrs"]
[package.metadata.maturin]
name = "pdf_oxide"
python-source = "python"
[profile.bench]
lto = true
opt-level = 3
[profile.release]
codegen-units = 1
lto = true
opt-level = 3
strip = true
[profile.release-small]
codegen-units = 1
inherits = "release"
lto = true
opt-level = "z"
panic = "abort"
strip = true
[[test]]
name = "test_4byte_character_codes"
path = "tests/test_4byte_character_codes.rs"
[[test]]
name = "test_actualtext_extraction"
path = "tests/test_actualtext_extraction.rs"
[[test]]
name = "test_adaptive_threshold"
path = "tests/test_adaptive_threshold.rs"
[[test]]
name = "test_advanced_cmap_directives"
path = "tests/test_advanced_cmap_directives.rs"
[[test]]
name = "test_advanced_cmap_features"
path = "tests/test_advanced_cmap_features.rs"
[[test]]
name = "test_alice_wonderland"
path = "tests/test_alice_wonderland.rs"
[[test]]
name = "test_annotation_integration"
path = "tests/test_annotation_integration.rs"
[[test]]
name = "test_ccitt_decoder_diagnosis"
path = "tests/test_ccitt_decoder_diagnosis.rs"
[[test]]
name = "test_ccitt_edge_cases"
path = "tests/test_ccitt_edge_cases.rs"
[[test]]
name = "test_ccitt_params_extraction"
path = "tests/test_ccitt_params_extraction.rs"
[[test]]
name = "test_cff_font_support"
path = "tests/test_cff_font_support.rs"
[[test]]
name = "test_character_mapping_fixes"
path = "tests/test_character_mapping_fixes.rs"
[[test]]
name = "test_character_tracking_integration"
path = "tests/test_character_tracking_integration.rs"
[[test]]
name = "test_cid_to_gid_map"
path = "tests/test_cid_to_gid_map.rs"
[[test]]
name = "test_citation_detection"
path = "tests/test_citation_detection.rs"
[[test]]
name = "test_cjk_script_support"
path = "tests/test_cjk_script_support.rs"
[[test]]
name = "test_cmap_caching"
path = "tests/test_cmap_caching.rs"
[[test]]
name = "test_complex_script_support"
path = "tests/test_complex_script_support.rs"
[[test]]
name = "test_compliance"
path = "tests/test_compliance.rs"
[[test]]
name = "test_config_adapter"
path = "tests/test_config_adapter.rs"
[[test]]
name = "test_converters"
path = "tests/test_converters.rs"
[[test]]
name = "test_custom_encoding"
path = "tests/test_custom_encoding.rs"
[[test]]
name = "test_debug_visualization"
path = "tests/test_debug_visualization.rs"
[[test]]
name = "test_decoders"
path = "tests/test_decoders.rs"
[[test]]
name = "test_document_pipeline_integration"
path = "tests/test_document_pipeline_integration.rs"
[[test]]
name = "test_document_structure"
path = "tests/test_document_structure.rs"
[[test]]
name = "test_dom_api"
path = "tests/test_dom_api.rs"
[[test]]
name = "test_editor"
path = "tests/test_editor.rs"
[[test]]
name = "test_email_citation_detection"
path = "tests/test_email_citation_detection.rs"
[[test]]
name = "test_embedded_files"
path = "tests/test_embedded_files.rs"
[[test]]
name = "test_encryption_write"
path = "tests/test_encryption_write.rs"
[[test]]
name = "test_extended_ascii_character_mapping"
path = "tests/test_extended_ascii_character_mapping.rs"
[[test]]
name = "test_form_fields"
path = "tests/test_form_fields.rs"
[[test]]
name = "test_foundation"
path = "tests/test_foundation.rs"
[[test]]
name = "test_full_extraction"
path = "tests/test_full_extraction.rs"
[[test]]
name = "test_hierarchical_integration"
path = "tests/test_hierarchical_integration.rs"
[[test]]
name = "test_high_level_api"
path = "tests/test_high_level_api.rs"
[[test]]
name = "test_hyphenation_integration"
path = "tests/test_hyphenation_integration.rs"
[[test]]
name = "test_image_embedding"
path = "tests/test_image_embedding.rs"
[[test]]
name = "test_image_filters"
path = "tests/test_image_filters.rs"
[[test]]
name = "test_images"
path = "tests/test_images.rs"
[[test]]
name = "test_layout"
path = "tests/test_layout.rs"
[[test]]
name = "test_lazy_cmap_loading"
path = "tests/test_lazy_cmap_loading.rs"
[[test]]
name = "test_ligature_expansion"
path = "tests/test_ligature_expansion.rs"
[[test]]
name = "test_markdown_extraction_quality"
path = "tests/test_markdown_extraction_quality.rs"
[[test]]
name = "test_multibyte_cid_support"
path = "tests/test_multibyte_cid_support.rs"
[[test]]
name = "test_object_resolution"
path = "tests/test_object_resolution.rs"
[[test]]
name = "test_objstm"
path = "tests/test_objstm.rs"
[[test]]
name = "test_ocr"
path = "tests/test_ocr.rs"
[[test]]
name = "test_ocr_inference"
path = "tests/test_ocr_inference.rs"
[[test]]
name = "test_ocr_integration"
path = "tests/test_ocr_integration.rs"
[[test]]
name = "test_ocr_module"
path = "tests/test_ocr_module.rs"
[[test]]
name = "test_ocr_scanned_document"
path = "tests/test_ocr_scanned_document.rs"
[[test]]
name = "test_ocr_with_models"
path = "tests/test_ocr_with_models.rs"
[[test]]
name = "test_optimized_cmap_parsing"
path = "tests/test_optimized_cmap_parsing.rs"
[[test]]
name = "test_outline"
path = "tests/test_outline.rs"
[[test]]
name = "test_page_operations"
path = "tests/test_page_operations.rs"
[[test]]
name = "test_parent_tree_lookup"
path = "tests/test_parent_tree_lookup.rs"
[[test]]
name = "test_pattern_detection"
path = "tests/test_pattern_detection.rs"
[[test]]
name = "test_pdf_ccitt_params"
path = "tests/test_pdf_ccitt_params.rs"
[[test]]
name = "test_pipeline_html_converter"
path = "tests/test_pipeline_html_converter.rs"
[[test]]
name = "test_pipeline_integration"
path = "tests/test_pipeline_integration.rs"
[[test]]
name = "test_pipeline_markdown_converter"
path = "tests/test_pipeline_markdown_converter.rs"
[[test]]
name = "test_predefined_cmap_loading"
path = "tests/test_predefined_cmap_loading.rs"
[[test]]
name = "test_predefined_cmaps"
path = "tests/test_predefined_cmaps.rs"
[[test]]
name = "test_pride_prejudice"
path = "tests/test_pride_prejudice.rs"
[[test]]
name = "test_primary_detection_mode_impl"
path = "tests/test_primary_detection_mode_impl.rs"
[[test]]
name = "test_reading_order_priority"
path = "tests/test_reading_order_priority.rs"
[[test]]
name = "test_rendering"
path = "tests/test_rendering.rs"
[[test]]
name = "test_rtl_script_support"
path = "tests/test_rtl_script_support.rs"
[[test]]
name = "test_search"
path = "tests/test_search.rs"
[[test]]
name = "test_spacing_integration"
path = "tests/test_spacing_integration.rs"
[[test]]
name = "test_spacing_spec_compliant"
path = "tests/test_spacing_spec_compliant.rs"
[[test]]
name = "test_spec_compliance_fallback"
path = "tests/test_spec_compliance_fallback.rs"
[[test]]
name = "test_stream_filters"
path = "tests/test_stream_filters.rs"
[[test]]
name = "test_table_extraction"
path = "tests/test_table_extraction.rs"
[[test]]
name = "test_tables"
path = "tests/test_tables.rs"
[[test]]
name = "test_text_justification_alignment"
path = "tests/test_text_justification_alignment.rs"
[[test]]
name = "test_type0_agl_fallback"
path = "tests/test_type0_agl_fallback.rs"
[[test]]
name = "test_unicode_font_embedding"
path = "tests/test_unicode_font_embedding.rs"
[[test]]
name = "test_whitespace_normalization"
path = "tests/test_whitespace_normalization.rs"
[[test]]
name = "test_word_boundary_character_tracking"
path = "tests/test_word_boundary_character_tracking.rs"
[[test]]
name = "test_word_boundary_detection"
path = "tests/test_word_boundary_detection.rs"
[[test]]
name = "test_word_boundary_integration"
path = "tests/test_word_boundary_integration.rs"
[[test]]
name = "test_word_boundary_mode_branching"
path = "tests/test_word_boundary_mode_branching.rs"
[[test]]
name = "test_word_boundary_mode_config"
path = "tests/test_word_boundary_mode_config.rs"
[[test]]
name = "test_word_boundary_performance"
path = "tests/test_word_boundary_performance.rs"
[[test]]
name = "test_zero_byte_font_handling"
path = "tests/test_zero_byte_font_handling.rs"