pdf_oxide 0.2.4

Production-grade PDF parsing: spec-compliant text extraction, intelligent reading order, OCR support. Ultra-fast Rust performance.
Documentation
[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"

[project]
name = "pdf_oxide"
version = "0.2.4"
description = "Production-grade PDF parsing: spec-compliant text extraction, intelligent reading order, OCR support. Ultra-fast Rust performance."
readme = "README.md"
requires-python = ">=3.8"
license = {text = "MIT OR Apache-2.0"}
authors = [
    {name = "PDF Oxide Contributors", email = "yf@yfedoseev.com"},
]
keywords = ["pdf", "text-extraction", "ocr", "rag", "llm", "markdown", "document-parser"]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: MIT License",
    "License :: OSI Approved :: Apache Software License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Rust",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Topic :: Text Processing :: Markup :: HTML",
    "Topic :: Text Processing :: Markup :: Markdown",
]

[project.urls]
Homepage = "https://github.com/yfedoseev/pdf_oxide"
Documentation = "https://docs.rs/pdf_oxide"
Repository = "https://github.com/yfedoseev/pdf_oxide"
"Bug Tracker" = "https://github.com/yfedoseev/pdf_oxide/issues"

[project.optional-dependencies]
test = ["pytest>=7.0"]

[tool.maturin]
features = ["python", "pyo3/extension-module"]
python-source = "python"
module-name = "pdf_oxide.pdf_oxide"

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]

# Ruff - Fast Python linter and formatter (written in Rust)
# Replaces: Black, isort, Flake8, pyupgrade, and more
# Docs: https://docs.astral.sh/ruff/
[tool.ruff]
line-length = 100
target-version = "py38"
extend-exclude = [
    ".git",
    "__pycache__",
    "build",
    "dist",
    ".eggs",
    "*.egg-info",
]

[tool.ruff.lint]
# Enable these rule sets
select = [
    "E",      # pycodestyle errors
    "W",      # pycodestyle warnings
    "F",      # pyflakes
    "I",      # isort (import sorting)
    "N",      # pep8-naming
    "UP",     # pyupgrade (modernize Python code)
    "B",      # flake8-bugbear (find likely bugs)
    "C4",     # flake8-comprehensions
    "SIM",    # flake8-simplify
    "TCH",    # flake8-type-checking
    "RUF",    # Ruff-specific rules
]

# Disable specific rules if needed
ignore = [
    "E501",   # Line too long (handled by formatter)
    "B008",   # Do not perform function calls in argument defaults
]

# Allow unused variables when underscore-prefixed
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.lint.per-file-ignores]
# Ignore import violations in __init__.py files
"__init__.py" = ["F401", "F403"]
# Allow print statements in scripts
"scripts/*.py" = ["T201"]

[tool.ruff.lint.isort]
known-first-party = ["pdf_oxide"]
force-single-line = false
lines-after-imports = 2

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "lf"