[package]
edition = "2024"
name = "dataprof"
version = "0.6.2"
authors = ["Andrea Bozzo"]
build = false
exclude = [
"assets/animations/*",
"assets/images/*.png",
"examples/*",
".venv/*",
".mypy_cache/*",
"python/*",
".github/*",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "High-performance data profiler with ISO 8000/25012 quality metrics for CSV, JSON/JSONL, and Parquet files"
homepage = "https://github.com/AndreaBozzo/dataprof"
documentation = "https://docs.rs/dataprof"
readme = "README.md"
keywords = [
"data",
"analysis",
"cli",
"data-quality",
"parquet",
]
categories = [
"command-line-utilities",
"data-structures",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/AndreaBozzo/dataprof"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[features]
all-db = [
"postgres",
"mysql",
"sqlite",
]
async-streaming = [
"dep:tokio",
"dep:async-trait",
"dep:bytes",
"dep:tokio-util",
]
cli = [
"dep:clap",
"dep:colored",
"dep:is-terminal",
"dep:indicatif",
"dep:env_logger",
]
database = [
"dep:tokio",
"dep:async-trait",
"dep:url",
]
datafusion = [
"dep:datafusion",
"dep:tokio",
]
default = ["cli"]
full-cli = [
"cli",
"all-db",
]
minimal = []
mysql = [
"database",
"dep:sqlx",
"sqlx/mysql",
]
parquet-async = [
"parquet/async",
"dep:reqwest",
"async-streaming",
]
postgres = [
"database",
"dep:sqlx",
"sqlx/postgres",
]
production = [
"postgres",
"mysql",
]
python = ["dep:pyo3"]
python-async = [
"python",
"dep:pyo3-async-runtimes",
"dep:tokio",
]
sqlite = [
"database",
"dep:sqlx",
"sqlx/sqlite",
]
[lib]
name = "dataprof"
crate-type = ["rlib"]
path = "src/lib.rs"
[[bin]]
name = "dataprof-cli"
path = "src/main.rs"
required-features = ["cli"]
[[test]]
name = "async_streaming"
path = "tests/async_streaming.rs"
[[test]]
name = "cross_engine_consistency"
path = "tests/cross_engine_consistency.rs"
[[test]]
name = "database_integration"
path = "tests/database_integration.rs"
[[bench]]
name = "benchmarks"
path = "benches/benchmarks.rs"
harness = false
[dependencies.anyhow]
version = "1.0"
[dependencies.arrow]
version = "57.3.0"
features = ["ffi"]
[dependencies.async-trait]
version = "0.1"
optional = true
[dependencies.bytes]
version = "1.10"
optional = true
[dependencies.chrono]
version = "0.4"
features = ["serde"]
[dependencies.clap]
version = "4.5.54"
features = ["derive"]
optional = true
[dependencies.colored]
version = "3.0"
optional = true
[dependencies.csv]
version = "1.4"
[dependencies.datafusion]
version = "52.2.0"
optional = true
[dependencies.env_logger]
version = "0.11"
optional = true
[dependencies.futures]
version = "0.3.31"
[dependencies.glob]
version = "0.3"
[dependencies.indicatif]
version = "0.18"
optional = true
[dependencies.is-terminal]
version = "0.4"
optional = true
[dependencies.log]
version = "0.4"
[dependencies.memmap2]
version = "0.9"
[dependencies.num_cpus]
version = "1.16"
[dependencies.parquet]
version = "57.3.0"
features = ["arrow"]
[dependencies.pyo3]
version = "0.27"
features = ["extension-module"]
optional = true
[dependencies.pyo3-async-runtimes]
version = "0.27"
features = ["tokio-runtime"]
optional = true
[dependencies.rand]
version = "0.9"
features = ["small_rng"]
[dependencies.rand_chacha]
version = "0.9"
[dependencies.rand_distr]
version = "0.5"
[dependencies.rayon]
version = "1.8"
[dependencies.regex]
version = "1.12"
[dependencies.reqwest]
version = "0.12"
features = [
"rustls-tls",
"stream",
]
optional = true
default-features = false
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0.149"
[dependencies.sqlx]
version = "0.8.1"
features = [
"runtime-tokio-rustls",
"chrono",
"uuid",
]
optional = true
default-features = false
[dependencies.sysinfo]
version = "0.38"
[dependencies.thiserror]
version = "2.0"
[dependencies.tokio]
version = "1.49"
features = ["full"]
optional = true
[dependencies.tokio-util]
version = "0.7"
features = ["io-util"]
optional = true
[dependencies.toml]
version = "0.9.8"
[dependencies.url]
version = "2.5.8"
optional = true
[dependencies.uuid]
version = "1.0"
features = ["v4"]
[dependencies.wide]
version = "1.1.1"
[dev-dependencies.anyhow]
version = "1.0"
[dev-dependencies.criterion]
version = "0.8"
features = ["html_reports"]
[dev-dependencies.env_logger]
version = "0.11"
[dev-dependencies.predicates]
version = "3.1"
[dev-dependencies.proptest]
version = "1.9"
[dev-dependencies.serde_json]
version = "1.0"
[dev-dependencies.serial_test]
version = "3.4.0"
[dev-dependencies.tempfile]
version = "3.24"
[lints.clippy]
all = "deny"
[lints.rust]
unsafe_code = "warn"
[profile.ci]
opt-level = 0
codegen-units = 1
debug = 2
incremental = false
inherits = "test"
[profile.dev]
opt-level = 0
codegen-units = 256
debug = 2
split-debuginfo = "packed"
incremental = true
[profile.dev.package."*"]
opt-level = 1
[profile.release]
lto = true
codegen-units = 1
panic = "abort"
strip = true
[profile.release-lto]
inherits = "release"
[profile.test]
opt-level = 1
codegen-units = 256
debug = 2
incremental = true
[profile.test.package."*"]
opt-level = 1