[package]
name = "dataprof"
version = "0.4.78"
edition = "2021"
license = "MIT"
authors = ["Andrea Bozzo"]
description = "High-performance data profiler with ISO 8000/25012 quality metrics for CSV, JSON/JSONL, and Parquet files"
repository = "https://github.com/AndreaBozzo/dataprof"
keywords = ["data", "analysis", "cli", "data-quality", "parquet"]
categories = ["command-line-utilities", "data-structures"]
[dependencies]
csv = "1.3"
serde = { version = "1.0", features = ["derive"] }
clap = { version = "4.5", features = ["derive"] }
colored = "3.0"
is-terminal = "0.4"
anyhow = "1.0"
thiserror = "2.0"
regex = "1.10"
lazy_static = "1.4"
serde_json = "1.0"
handlebars = "5.1"
toml = "0.8"
log = "0.4"
indicatif = "0.17"
sysinfo = "0.30"
rayon = "1.8"
memmap2 = "0.9"
rand_distr = "0.4"
chrono = { version = "0.4", features = ["serde"] }
wide = "0.7"
rand = { version = "0.8", features = ["small_rng"] }
rand_chacha = "0.3"
glob = "0.3"
glob-match = "0.2"
num_cpus = "1.16"
walkdir = "2.5"
pyo3 = { version = "0.24.1", features = ["extension-module"], optional = true }
tokio = { version = "1.0", features = ["full"], optional = true }
async-trait = { version = "0.1", optional = true }
sqlx = { version = "0.8.1", features = [
"runtime-tokio-rustls",
"chrono",
"uuid",
], optional = true, default-features = false }
url = { version = "2.5", optional = true }
arrow = { version = "56.1", optional = true }
parquet = { version = "56.1", features = ["arrow"], optional = true }
[dev-dependencies]
tempfile = "3.10"
proptest = "1.5"
criterion = { version = "0.5", features = ["html_reports"] }
lazy_static = "1.4"
serde_json = "1.0"
assert_matches = "1.5"
serial_test = "3.1"
fake = { version = "2.9", features = ["derive"] }
quickcheck = "1.0"
quickcheck_macros = "1.0"
[[bin]]
name = "dataprof-cli"
path = "src/main.rs"
[[bench]]
name = "unified_benchmarks"
harness = false
[[bench]]
name = "domain_benchmarks"
harness = false
[[bench]]
name = "statistical_benchmark"
harness = false
[profile.dev]
opt-level = 0
debug = true
split-debuginfo = "packed"
incremental = true
codegen-units = 256
[profile.test]
opt-level = 1
debug = 2
incremental = true
codegen-units = 256
[profile.release]
lto = "thin"
codegen-units = 1
opt-level = 3
panic = "abort"
[profile.bench]
inherits = "release"
panic = "unwind"
[profile.dev.package."*"]
opt-level = 1
[profile.test.package."*"]
opt-level = 1
[features]
default = []
python = ["dep:pyo3"]
database = ["dep:tokio", "dep:async-trait", "dep:url"]
arrow = ["dep:arrow"]
parquet = ["arrow", "dep:parquet"]
postgres = ["database", "dep:sqlx", "sqlx/postgres"]
mysql = ["database", "dep:sqlx", "sqlx/mysql"]
sqlite = ["database", "dep:sqlx", "sqlx/sqlite"]
minimal = []
production = ["postgres", "mysql"]
all-db = ["postgres", "mysql", "sqlite"]
[lib]
name = "dataprof"
crate-type = ["rlib", "cdylib"]