pdf_oxide 0.3.13

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
# Rust
/target/
**/*.rs.bk
*.pdb

# Cargo.lock - include for binaries, exclude for libraries
# For a library, uncomment the next line:
/Cargo.lock

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
.pytest_cache/
.coverage
.coverage.*
htmlcov/
.tox/
.nox/
.hypothesis/
*.cover
*.log
.mypy_cache/
.dmypy.json
dmypy.json
.pyre/

# Virtual environments
venv/
env/
ENV/
env.bak/
venv.bak/
.venv/
*_venv/
venv_*/

# PyO3/Maturin
*.whl
python/pdf_library.egg-info/
python/pdf_oxide.egg-info/

# WASM
pkg/
wasm-pack.log
# Generated wasm-bindgen output (build artifacts, not source)
examples/wasm_node/*.wasm
examples/wasm_node/*.js
examples/wasm_node/*.d.ts

# IDEs
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
Thumbs.db
*.code-workspace

# JetBrains IDEs
.idea/
*.iml
*.ipr
*.iws
.idea_modules/

# Vim
[._]*.s[a-v][a-z]
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]

# Emacs
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*

# OS
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
Desktop.ini

# ML Models (large files - use Git LFS if needed)
models/*.onnx
models/*.pt
models/*.pth
models/*.h5
models/*.pb
# OCR model files
.models/
# ONNX Runtime cache
.ort/
.models/**/*.onnx
.models/**/*.tar
.models/**/inference.*
training/output/
training/data/raw/

# Keep model registry and documentation
!models/registry.json
!models/README.md

# Test fixtures (PDFs can be large)
tests/fixtures/large/
tests/fixtures/real/*.pdf

# Keep small test PDFs for CI
!tests/fixtures/simple.pdf
!tests/fixtures/hello_world.pdf

# Test datasets (exclude all downloaded PDFs for validation)
test_datasets/
!test_datasets/README.md
!test_datasets/SOURCES.md

# Benchmarking results
target/criterion/
benches/results/
benchmark_results/
benchmark_venv/
*.profdata
*.profraw

# Documentation builds
target/doc/
book/

# Temporary files
*.tmp
*.temp
tmp/
temp/

# Logs
*.log
logs/

# Environment files
.env
.env.local
.env.*.local

# Docker
.dockerignore

# CI/CD
.gitlab-ci-local/

# Coverage reports
tarpaulin-report.html
cobertura.xml
lcov.info

# Lock files for package managers
package-lock.json
yarn.lock
pnpm-lock.yaml

# Node modules (for WASM examples)
node_modules/

# Build artifacts
*.o
*.a
*.lib
*.dll
*.dylib

# Profiling
flamegraph.svg
perf.data
perf.data.old

# Label Studio exports (large annotation files)
training/dataset/*.json
!training/dataset/schema.json

# Jupyter notebooks checkpoints
.ipynb_checkpoints/

# Training checkpoints
checkpoints/

# Markdown exports (generated content for comparison)
markdown_exports/

# Export directories (generated data)
html_exports/
text_exports/
pdf_exports/

# Batch extraction output directories (generated by scripts/batch_extract_pdfs.sh)
/tmp/pdf_extraction_*/

# Claude Code
Claude.md
CLAUDE.md

# Stray stdout captures
stdout

# Debug/profiling example scripts (local development only)
examples/bench_*.rs
examples/debug_*.rs
examples/profile_*.rs
examples/deep_profile.rs
examples/dump_content.rs
examples/perf_target.rs
examples/phase_bench.rs
examples/space_spans.rs
examples/span_count.rs
examples/span_count2.rs
examples/content_stream_sizes.rs
examples/bottleneck_analysis.rs
examples/verify_text.rs
examples/xobject_investigation.rs
examples/test_issue*.rs
examples/test_irs_*.rs
examples/diagnose_*.rs
examples/trace_slow.rs
examples/verify_corpus.rs
examples/diagnose_slow_images.rs
examples/analyze_tables.rs
examples/inspect_char_codes.rs
examples/test_spacing_fix.rs
examples/test_pdf.rs

# Root-level temporary PDFs (local dev artifacts)
/*.pdf

# Generated planning and analysis documents (auto-generated during work)
PHASE_*.md
PLAN_*.md
CLEANUP_ROADMAP.md
WORD_BOUNDARY_ANALYSIS.md
examples/test_pdf.rs

# Temporary working documents (NOT for public repo)
COMPLETION_CHECKLIST.md
DEBUG_FINDINGS.md
DELIVERABLES_FIX_*.md
EXAMPLES_OF_ISSUES.md
EXECUTE_VALIDATION.txt
EXECUTIVE_SOLUTION_SUMMARY.md
GOLDEN_FILES_QUICK_START.md
GOLDEN_FILES_README.md
OCR_INTEGRATION_NOTES.md
PHASE3_*.md
QUALITY_REPORT.md
QUICK_START_FIX_*.md
VERIFICATION_REPORT_*.md
VISUAL_CHANGES_FIX_*.txt
INTELLIGENT_PROCESSOR_INTEGRATION.md
TEXTSOURCE_NAMING_RATIONALE.md

# Temporary directories
batch_analysis/
rust_out/
scanned_samples/

# Temporary test outputs
*_enhanced.md
*_formatted.md
*_unformatted.md

# Temporary analysis files
*.backup
*.bak
*.tmp_analysis

# Golden files for regression testing (generated, not committed)
tests/golden_files/

# Claude Code AI assistant configuration (internal development tool)
.claude/


# pdm-python
.pdm-python

# uv-python
.python-version

# rust_out
rust_out
scripts/workdir/
docs/workdir/
docs/planning/

# Fossil static analysis cache
.fossil/

# Local working PDFs and verification results
workdir_pdfs/
verifications/