datafusion-python 52.0.0

Apache DataFusion DataFrame and SQL Query Engine
Documentation
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

[build-system]
requires = ["maturin>=1.8.1"]
build-backend = "maturin"

[project]
name = "datafusion"
description = "Build and run queries against data"
readme = "README.md"
license = { file = "LICENSE.txt" }
requires-python = ">=3.10"
keywords = ["datafusion", "dataframe", "rust", "query-engine"]
classifiers = [
  "Development Status :: 2 - Pre-Alpha",
  "Intended Audience :: Developers",
  "License :: OSI Approved :: Apache Software License",
  "License :: OSI Approved",
  "Operating System :: MacOS",
  "Operating System :: Microsoft :: Windows",
  "Operating System :: POSIX :: Linux",
  "Programming Language :: Python :: 3",
  "Programming Language :: Python :: 3.10",
  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.12",
  "Programming Language :: Python :: 3.13",
  "Programming Language :: Python :: 3.14",
  "Programming Language :: Python",
  "Programming Language :: Rust",
]
dependencies = [
  "pyarrow>=16.0.0;python_version<'3.14'",
  "pyarrow>=22.0.0;python_version>='3.14'",
  "typing-extensions;python_version<'3.13'",
]
dynamic = ["version"]

[project.urls]
homepage = "https://datafusion.apache.org/python"
documentation = "https://datafusion.apache.org/python"
repository = "https://github.com/apache/datafusion-python"

[tool.isort]
profile = "black"

[tool.maturin]
python-source = "python"
module-name = "datafusion._internal"
include = [{ path = "Cargo.lock", format = "sdist" }]
exclude = [".github/**", "ci/**", ".asf.yaml"]
# Require Cargo.lock is up to date
locked = true
features = ["substrait"]

[tool.pytest.ini_options]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"

# Enable docstring linting using the google style guide
[tool.ruff.lint]
select = ["ALL"]
ignore = [
  "A001",    # Allow using words like min as variable names
  "A002",    # Allow using words like filter as variable names
  "ANN401",  # Allow Any for wrapper classes
  "COM812",  # Recommended to ignore these rules when using with ruff-format
  "FIX002",  # Allow TODO lines - consider removing at some point
  "FBT001",  # Allow boolean positional args
  "FBT002",  # Allow boolean positional args
  "ISC001",  # Recommended to ignore these rules when using with ruff-format
  "SLF001",  # Allow accessing private members
  "TD002",   # Do not require author names in TODO statements
  "TD003",   # Allow TODO lines
  "PLR0913", # Allow many arguments in function definition
  "PD901",   # Allow variable name df
  "N812",    # Allow importing functions as `F`
  "A005",    # Allow module named io
]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.lint.pycodestyle]
max-doc-length = 88

[tool.ruff.lint.flake8-boolean-trap]
extend-allowed-calls = ["lit", "datafusion.lit"]

# Disable docstring checking for these directories
[tool.ruff.lint.per-file-ignores]
"python/tests/*" = [
  "ANN",
  "ARG",
  "BLE001",
  "D",
  "S101",
  "SLF",
  "PD",
  "PLR2004",
  "PT011",
  "RUF015",
  "S608",
  "PLR0913",
  "PT004",
]
"examples/*" = [
  "D",
  "W505",
  "E501",
  "T201",
  "S101",
  "PLR2004",
  "ANN001",
  "ANN202",
  "INP001",
  "DTZ007",
  "RUF015",
]
"dev/*" = [
  "D",
  "E",
  "T",
  "S",
  "PLR",
  "C",
  "SIM",
  "UP",
  "EXE",
  "N817",
  "ERA001",
  "ANN001",
]
"benchmarks/*" = [
  "D",
  "F",
  "T",
  "BLE",
  "FURB",
  "PLR",
  "E",
  "TD",
  "TRY",
  "S",
  "SIM",
  "EXE",
  "UP",
  "ERA001",
  "ANN001",
  "INP001",
]
"docs/*" = ["D"]
"docs/source/conf.py" = ["ERA001", "ANN001", "INP001"]

[tool.codespell]
skip = ["./target", "uv.lock", "./python/tests/test_functions.py"]
count = true
ignore-words-list = ["ans", "IST"]

[dependency-groups]
dev = [
  "arro3-core==0.6.5",
  "codespell==2.4.1",
  "maturin>=1.8.1",
  "nanoarrow==0.8.0",
  "numpy>1.25.0;python_version<'3.14'",
  "numpy>=2.3.2;python_version>='3.14'",
  "pre-commit>=4.3.0",
  "pyarrow>=19.0.0",
  "pygithub==2.5.0",
  "pytest>=7.4.4",
  "pytest-asyncio>=0.23.3",
  "pyyaml>=6.0.3",
  "ruff>=0.9.1",
  "toml>=0.10.2",
]
docs = [
  "ipython>=8.12.3",
  "jinja2>=3.1.5",
  "myst-parser>=3.0.1",
  "pandas>=2.0.3",
  "pickleshare>=0.7.5",
  "pydata-sphinx-theme==0.8.0",
  "setuptools>=75.3.0",
  "sphinx>=7.1.2",
  "sphinx-autoapi>=3.4.0",
]