# ---------------------------------------------------------------------------
# datapress configuration template.
#
# Copy this file to `datasets.toml` and edit it. The `datapress` CLI looks
# for its config in this order:
#
# 1. $DATAPRESS_CONFIG_FILE (explicit path; wins if set)
# 2. ./datasets.toml (current working directory)
# 3. $HOME/datasets.toml (per-user default)
#
# Every running instance reads the resolved file at startup and registers
# each [[dataset]] entry as a queryable table.
# ---------------------------------------------------------------------------
[server]
backend = "datafusion" # or duckdb
listen = "127.0.0.1" # Loopback by default; set "0.0.0.0" to expose the port.
port = 8080
# workers = 8 # actix worker threads; omit for "one per CPU".
# experimental quack protocol: allows duckdb client/server
# Even though this may not be faster than the arrow ipc query api, it does provide a feature-complete sql context for interactive end-user sessions.
# [server.quack] # DuckDB backend only
# enabled = true
# uri = "quack:localhost:9494" # default port 9494; use literal localhost (CORS),
# allow_other_hostname = true # true for quack:0.0.0.0:9494 behind TLS proxy
# read_only = true
# token = "analytics-token" # set a token the client needs to provide
# Embedded Swagger UI (requires a binary built with the `swagger` feature).
# [swagger]
# enabled = true
# path = "/docs"
# Optional raw SQL endpoint
# [sql]
# enabled = true # default false — endpoint returns 404 when off
# max_rows = 100000 # hard cap on rows returned by one query
# Prometheus metrics (requires the `metrics` feature).
# [metrics]
# enabled = true
# path = "/metrics"
# Embedded dataset explorer UI — Discovery view + in-browser DuckDB console
# (requires a binary built with the `explorer` feature).
# [explorer]
# enabled = true
# path = "/explore"
# -- Local parquet file -----------------------------------------------------
[[dataset]]
name = "example"
[dataset.source]
kind = "parquet"
location = "data/example.parquet"
# Index policy (DataFusion only; DuckDB ignores it):
# mode = "auto" (default) | "none" | "list"
# [dataset.index]
# mode = "list"
# columns = ["state", "category"]
# -- S3 parquet (AWS) -------------------------------------------------------
# [[dataset]]
# name = "events"
#
# [dataset.source]
# kind = "parquet"
# location = "s3://my-bucket/events/*.parquet"
#
# [dataset.s3]
# region = "eu-west-3"
# # endpoint = "https://s3.eu-west-3.amazonaws.com" # custom/MinIO/R2
# addressing_style = "virtual" # or "path"
# # allow_http = true # non-https endpoints
#
# # Credentials precedence (highest -> lowest):
# # 1. ${PREFIX}_AWS_ACCESS_KEY_ID / _SECRET_ACCESS_KEY / _SESSION_TOKEN
# # where PREFIX is the dataset name uppercased, non-alphanumeric -> '_'
# # (e.g. "events" -> EVENTS_AWS_ACCESS_KEY_ID).
# # 2. Inline access_key_id / secret_access_key below (dev only).
# # 3. Plain AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY / AWS_SESSION_TOKEN.
# # 4. The engine's default chain (~/.aws/credentials, IMDS).
# # access_key_id = "..."
# # secret_access_key = "..."