dataprof 0.4.81

High-performance data profiler with ISO 8000/25012 quality metrics for CSV, JSON/JSONL, and Parquet files
Documentation
[[bench]]
harness = false
name = "domain_benchmarks"
path = "benches/domain_benchmarks.rs"

[[bench]]
harness = false
name = "statistical_benchmark"
path = "benches/statistical_benchmark.rs"

[[bench]]
harness = false
name = "unified_benchmarks"
path = "benches/unified_benchmarks.rs"

[[bin]]
name = "dataprof-cli"
path = "src/main.rs"

[dependencies.anyhow]
version = "1.0"

[dependencies.arrow]
optional = true
version = "57.0"

[dependencies.async-trait]
optional = true
version = "0.1"

[dependencies.chrono]
features = ["serde"]
version = "0.4"

[dependencies.clap]
features = ["derive"]
version = "4.5"

[dependencies.colored]
version = "3.0"

[dependencies.csv]
version = "1.4"

[dependencies.glob]
version = "0.3"

[dependencies.glob-match]
version = "0.2"

[dependencies.handlebars]
version = "6.3"

[dependencies.indicatif]
version = "0.18"

[dependencies.is-terminal]
version = "0.4"

[dependencies.log]
version = "0.4"

[dependencies.memmap2]
version = "0.9"

[dependencies.num_cpus]
version = "1.16"

[dependencies.parquet]
features = ["arrow"]
optional = true
version = "57.0"

[dependencies.pyo3]
features = ["extension-module"]
optional = true
version = "0.27"

[dependencies.pyo3-async-runtimes]
features = ["tokio-runtime"]
optional = true
version = "0.27"

[dependencies.rand]
features = ["small_rng"]
version = "0.9"

[dependencies.rand_chacha]
version = "0.9"

[dependencies.rand_distr]
version = "0.5"

[dependencies.rayon]
version = "1.8"

[dependencies.regex]
version = "1.12"

[dependencies.serde]
features = ["derive"]
version = "1.0"

[dependencies.serde_json]
version = "1.0"

[dependencies.sqlx]
default-features = false
features = ["runtime-tokio-rustls", "chrono", "uuid"]
optional = true
version = "0.8.1"

[dependencies.sysinfo]
version = "0.37"

[dependencies.thiserror]
version = "2.0"

[dependencies.tokio]
features = ["full"]
optional = true
version = "1.0"

[dependencies.toml]
version = "0.9"

[dependencies.url]
optional = true
version = "2.5"

[dependencies.wide]
version = "0.8"

[dev-dependencies.criterion]
features = ["html_reports"]
version = "0.7"

[dev-dependencies.proptest]
version = "1.9"

[dev-dependencies.serde_json]
version = "1.0"

[dev-dependencies.serial_test]
version = "3.1"

[dev-dependencies.tempfile]
version = "3.10"

[[example]]
name = "arrow_example"
path = "examples/arrow_example.rs"

[[example]]
name = "config_builder_example"
path = "examples/config_builder_example.rs"

[[example]]
name = "database_examples"
path = "examples/database_examples.rs"

[[example]]
name = "iso_compliance"
path = "examples/iso_compliance.rs"

[[example]]
name = "parquet_example"
path = "examples/parquet_example.rs"

[features]
all-db = ["postgres", "mysql", "sqlite"]
arrow = ["dep:arrow"]
database = ["dep:tokio", "dep:async-trait", "dep:url"]
default = []
minimal = []
mysql = ["database", "dep:sqlx", "sqlx/mysql"]
parquet = ["arrow", "dep:parquet"]
postgres = ["database", "dep:sqlx", "sqlx/postgres"]
production = ["postgres", "mysql"]
python = ["dep:pyo3"]
python-async = ["python", "dep:pyo3-async-runtimes", "dep:tokio"]
sqlite = ["database", "dep:sqlx", "sqlx/sqlite"]

[lib]
crate-type = ["rlib", "cdylib"]
name = "dataprof"
path = "src/lib.rs"

[package]
authors = ["Andrea Bozzo"]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["command-line-utilities", "data-structures"]
description = "High-performance data profiler with ISO 8000/25012 quality metrics for CSV, JSON/JSONL, and Parquet files"
documentation = "https://docs.rs/dataprof"
edition = "2021"
homepage = "https://github.com/AndreaBozzo/dataprof"
keywords = ["data", "analysis", "cli", "data-quality", "parquet"]
license = "MIT"
name = "dataprof"
readme = "README.md"
repository = "https://github.com/AndreaBozzo/dataprof"
version = "0.4.81"

[package.metadata.docs.rs]
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

[profile.bench]
inherits = "release"
panic = "unwind"

[profile.ci]
codegen-units = 1
debug = 2
incremental = false
inherits = "test"
opt-level = 0

[profile.dev]
codegen-units = 256
debug = 2
incremental = true
opt-level = 0
split-debuginfo = "packed"

[profile.dev.package."*"]
opt-level = 1

[profile.release]
codegen-units = 1
lto = "thin"
opt-level = 3
panic = "abort"

[profile.test]
codegen-units = 256
debug = 2
incremental = true
opt-level = 1

[profile.test.package."*"]
opt-level = 1

[[test]]
name = "adaptive_engine_tests"
path = "tests/adaptive_engine_tests.rs"

[[test]]
name = "arrow_integration_test"
path = "tests/arrow_integration_test.rs"

[[test]]
name = "arrow_performance_test"
path = "tests/arrow_performance_test.rs"

[[test]]
name = "cli_basic_tests"
path = "tests/cli_basic_tests.rs"

[[test]]
name = "data_quality_metrics_test"
path = "tests/data_quality_metrics_test.rs"

[[test]]
name = "data_quality_simple"
path = "tests/data_quality_simple.rs"

[[test]]
name = "database_heavy"
path = "tests/database_heavy.rs"

[[test]]
name = "database_integration"
path = "tests/database_integration.rs"

[[test]]
name = "error_handling_simple"
path = "tests/error_handling_simple.rs"

[[test]]
name = "integration_tests"
path = "tests/integration_tests.rs"

[[test]]
name = "iso_compliance_test"
path = "tests/iso_compliance_test.rs"

[[test]]
name = "memory_leak_tests"
path = "tests/memory_leak_tests.rs"

[[test]]
name = "metrics_scientific_validation"
path = "tests/metrics_scientific_validation.rs"

[[test]]
name = "parquet_integration_test"
path = "tests/parquet_integration_test.rs"

[[test]]
name = "security_tests"
path = "tests/security_tests.rs"

[[test]]
name = "v03_comprehensive"
path = "tests/v03_comprehensive.rs"