rust-data-processing 0.1.4

Schema-first ingestion (CSV, JSON, Parquet, Excel) into an in-memory DataSet, plus Polars-backed pipelines, SQL, profiling, validation, and map/reduce-style processing.
[package]
name = "rust-data-processing"
version = "0.1.4"
edition = "2024"
rust-version = "1.85"
description = "Schema-first ingestion (CSV, JSON, Parquet, Excel) into an in-memory DataSet, plus Polars-backed pipelines, SQL, profiling, validation, and map/reduce-style processing."
readme = "README_CRATE.md"
license = "MIT OR Apache-2.0"
repository = "https://github.com/vihangdesai2018-png/rust-data-processing"
documentation = "https://docs.rs/rust-data-processing"
keywords = ["csv", "json", "parquet", "dataset", "etl"]
categories = ["database-implementations", "parser-implementations"]
exclude = [
    "Planning/",
    "python-wrapper/",
    "docs/python/",
    "docs/landing/",
    # Monorepo landing README (Python + Rust); not the crate README. Shipped readme is README_CRATE.md.
    "README.md",
    "test_run.log",
    ".github/",
    "scripts/",
]

[dependencies]
csv = "1.3.1"
calamine = { version = "0.33.0", optional = true }
parquet = "57"
rayon = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1.0.139"
serde_arrow = { version = "0.14.0", default-features = false, features = ["arrow-54"], optional = true }
thiserror = "2.0.11"
polars = { version = "0.53.0", features = ["lazy", "csv", "parquet", "json", "fmt"] }
polars-sql = { version = "0.53.0", optional = true }
rust_xlsxwriter = { version = "0.93", optional = true }
arrow = { version = "54", optional = true }
connectorx = { version = "0.4.5", default-features = false, features = ["dst_arrow", "fptr"], optional = true }

[features]
# Keep the default dependency surface small; enable connectors explicitly.
default = ["sql"]
# Enable Excel ingestion (adds `calamine`).
excel = ["dep:calamine"]
# Enable Excel integration tests / benchmarks that generate an `.xlsx` at runtime (adds `rust_xlsxwriter`).
excel_test_writer = ["excel", "dep:rust_xlsxwriter"]
deep_tests = []
sql = ["dep:polars-sql"]
arrow = ["dep:arrow"]
serde_arrow = ["arrow", "dep:serde_arrow"]
db_connectorx = [
  "arrow",
  "dep:connectorx",
  "connectorx/src_postgres",
  "connectorx/src_mysql",
  "connectorx/src_mssql",
  "connectorx/src_oracle",
]
# Same feature set as `rust_ci.yml` “expanded” job: deep tests + Arrow/serde_arrow + Excel writer,
# but **not** `db_connectorx` (ConnectorX → OpenSSL; needs Perl or system SSL on Windows).
ci_expanded = ["deep_tests", "excel_test_writer", "arrow", "serde_arrow"]

[dev-dependencies]
criterion = "0.5"

[[bench]]
name = "pipelines"
harness = false

[[bench]]
name = "ingestion"
harness = false

[[bench]]
name = "map_reduce"
harness = false

[[bench]]
name = "profiling"
harness = false

[[bench]]
name = "validation"
harness = false

[[bench]]
name = "outliers"
harness = false

# Reduce Windows/MSVC linker memory use for `cargo test`.
# (Integration tests can otherwise hit LNK1102 when linking large dependency graphs like Polars.)
[profile.test]
debug = 0