arrow2 0.18.0

Unofficial implementation of Apache Arrow spec in safe Rust
Documentation
[package]
name = "arrow2"
version = "0.18.0"
license = "Apache-2.0"
description = "Unofficial implementation of Apache Arrow spec in safe Rust"
homepage = "https://github.com/jorgecarleitao/arrow2"
repository = "https://github.com/jorgecarleitao/arrow2"
authors = ["Jorge C. Leitao <jorgecarleitao@gmail.com>", "Apache Arrow <dev@arrow.apache.org>"]
keywords = ["arrow", "analytics"]
edition = "2021"
exclude = ["testing/"]

[lib]
name = "arrow2"
bench = false

[dependencies]
foreign_vec = "0.1.0"
either = "1.9"
num-traits = "0.2"
dyn-clone = "1"
bytemuck = { version = "1", features = ["derive"] }
chrono = { version = "0.4", default_features = false, features = ["std"] }

# for decimal i256
ethnum = "1"

# We need to Hash values before sending them to an hasher. This
# crate provides HashMap that assumes pre-hashed values.
hash_hasher = "^2.0.3"
# For SIMD utf8 validation
simdutf8 = "0.1.4"

# A Rust port of SwissTable
hashbrown = { version = "0.14", default-features = false, features = ["ahash"] }

# for timezone support
chrono-tz = { version = "0.8", optional = true }
# To efficiently cast numbers to strings
lexical-core = { version = "0.8", optional = true }

# for csv io
csv = { version = "^1.1", optional = true }
csv-core = { version = "0.1", optional = true }

# for csv async io
csv-async = { version = "^1.1", optional = true }

regex = { version = "1.9", optional = true }
regex-syntax = { version = "0.7", optional = true }
streaming-iterator = { version = "0.1", optional = true }
fallible-streaming-iterator = { version = "0.1", optional = true }

json-deserializer = { version = "0.4.4", optional = true, features = ["preserve_order"] }
indexmap = { version = "^1.6", optional = true }

# used to print columns in a nice columnar format
comfy-table = { version = "6.0", optional = true, default-features = false }

arrow-format = { version = "0.8", optional = true, features = ["ipc"] }

hex = { version = "^0.4", optional = true }

# for IPC compression
lz4 = { version = "1.24", optional = true }
zstd = { version = "0.12", optional = true }

rand = { version = "0.8", optional = true }

itertools = { version = "^0.10", optional = true }

base64 = { version = "0.21.0", optional = true }

# to write to parquet as a stream
futures = { version = "0.3", optional = true }

# to read IPC as a stream
async-stream = { version = "0.3.2", optional = true }

# avro support
avro-schema = { version = "0.3", optional = true }

# ORC support
orc-format = { version = "0.3.0", optional = true }

# Arrow integration tests support
serde = { version = "^1.0", features = ["rc"], optional = true }
serde_derive = { version = "^1.0", optional = true }
serde_json = { version = "^1.0", features = ["preserve_order"], optional = true }

# for division/remainder optimization at runtime
strength_reduce = { version = "0.2", optional = true }

# For instruction multiversioning
multiversion = { version = "0.7.3", optional = true }

# For support for odbc
odbc-api = { version = "0.36", optional = true }

# Faster hashing
ahash = "0.8"

# Support conversion to/from arrow-rs
arrow-buffer = { version = ">=40", optional = true }
arrow-schema = { version = ">=40", optional = true }
arrow-data = { version = ">=40", optional = true }
arrow-array = { version = ">=40", optional = true }

[target.wasm32-unknown-unknown.dependencies]
getrandom = { version = "0.2", features = ["js"] }

# parquet support
[dependencies.parquet2]
version = "0.17"
optional = true
default_features = false
features = ["async"]

[dev-dependencies]
criterion = "0.4"
flate2 = "1"
doc-comment = "0.3"
crossbeam-channel = "0.5.1"
# used to test async readers
tokio = { version = "1", features = ["macros", "rt", "fs", "io-util"] }
tokio-util = { version = "0.7", features = ["compat"] }
# used to run formal property testing
proptest = { version = "1", default_features = false, features = ["std"] }
avro-rs = { version = "0.13", features = ["snappy"] }
# use for flaky testing
rand = "0.8"
# use for generating and testing random data samples
sample-arrow2 = "0.1"
sample-std = "0.1"
sample-test = "0.1"

# ugly hack needed to match this library in sample_arrow2
[patch.crates-io]
arrow2 = { path = "." }

[package.metadata.docs.rs]
features = ["full"]
rustdoc-args = ["--cfg", "docsrs"]

[features]
default = []
full = [
    "arrow",
    "io_odbc",
    "io_csv",
    "io_csv_async",
    "io_json",
    "io_ipc",
    "io_flight",
    "io_ipc_write_async",
    "io_ipc_read_async",
    "io_ipc_compression",
    "io_json_integration",
    "io_print",
    "io_parquet",
    "io_parquet_compression",
    "io_avro",
    "io_orc",
    "io_avro_compression",
    "io_avro_async",
    "regex",
    "regex-syntax",
    "compute",
    # parses timezones used in timestamp conversions
    "chrono-tz",
]
arrow = ["arrow-buffer", "arrow-schema", "arrow-data", "arrow-array"]
io_odbc = ["odbc-api"]
io_csv = ["io_csv_read", "io_csv_write"]
io_csv_async = ["io_csv_read_async"]
io_csv_read = ["csv", "lexical-core"]
io_csv_read_async = ["csv-async", "lexical-core", "futures"]
io_csv_write = ["csv-core", "streaming-iterator", "lexical-core"]
io_json = ["io_json_read", "io_json_write"]
io_json_read = ["json-deserializer", "indexmap", "lexical-core"]
io_json_write = ["streaming-iterator", "fallible-streaming-iterator", "lexical-core"]
io_ipc = ["arrow-format"]
io_ipc_write_async = ["io_ipc", "futures"]
io_ipc_read_async = ["io_ipc", "futures", "async-stream"]
io_ipc_compression = ["lz4", "zstd"]
io_flight = ["io_ipc", "arrow-format/flight-data"]

# base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format.
io_parquet = ["parquet2", "io_ipc", "base64", "futures", "streaming-iterator", "fallible-streaming-iterator"]

io_parquet_compression = [
    "io_parquet_zstd",
    "io_parquet_gzip",
    "io_parquet_snappy",
    "io_parquet_lz4",
    "io_parquet_brotli"
]

# sample testing of generated arrow data
io_parquet_sample_test = ["io_parquet"]

# compression backends
io_parquet_zstd = ["parquet2/zstd"]
io_parquet_snappy = ["parquet2/snappy"]
io_parquet_gzip = ["parquet2/gzip"]
io_parquet_lz4_flex = ["parquet2/lz4_flex"]
io_parquet_lz4 = ["parquet2/lz4"]
io_parquet_brotli = ["parquet2/brotli"]

# parquet bloom filter functions
io_parquet_bloom_filter = ["parquet2/bloom_filter"]

io_avro = ["avro-schema", "streaming-iterator"]
io_avro_compression = [
    "avro-schema/compression",
]
io_avro_async = ["avro-schema/async"]

io_orc = ["orc-format"]

# serde+serde_json: its dependencies + error handling
# serde_derive: there is some derive around
io_json_integration = ["hex", "serde", "serde_derive", "serde_json", "io_ipc"]
io_print = ["comfy-table"]
# the compute kernels. Disabling this significantly reduces compile time.
compute_aggregate = ["multiversion"]
compute_arithmetics_decimal = ["strength_reduce"]
compute_arithmetics = ["strength_reduce", "compute_arithmetics_decimal"]
compute_bitwise = []
compute_boolean = []
compute_boolean_kleene = []
compute_cast = ["lexical-core", "compute_take"]
compute_comparison = ["compute_take", "compute_boolean"]
compute_concatenate = []
compute_contains = []
compute_filter = []
compute_hash = ["multiversion"]
compute_if_then_else = []
compute_length = []
compute_like = ["regex", "regex-syntax"]
compute_limit = []
compute_merge_sort = ["itertools", "compute_sort"]
compute_nullif = ["compute_comparison"]
compute_partition = ["compute_sort"]
compute_regex_match = ["regex"]
compute_sort = ["compute_take"]
compute_substring = []
compute_take = []
compute_temporal = []
compute_window = ["compute_concatenate"]
compute_utf8 = []
compute = [
    "compute_aggregate",
    "compute_arithmetics",
    "compute_bitwise",
    "compute_boolean",
    "compute_boolean_kleene",
    "compute_cast",
    "compute_comparison",
    "compute_concatenate",
    "compute_contains",
    "compute_filter",
    "compute_hash",
    "compute_if_then_else",
    "compute_length",
    "compute_like",
    "compute_limit",
    "compute_merge_sort",
    "compute_nullif",
    "compute_partition",
    "compute_regex_match",
    "compute_sort",
    "compute_substring",
    "compute_take",
    "compute_temporal",
    "compute_utf8",
    "compute_window"
]
benchmarks = ["rand"]
serde_types = ["serde", "serde_derive"]
simd = []

[build-dependencies]
rustc_version = "0.4.0"

[package.metadata.cargo-all-features]
allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]

[[bench]]
name = "take_kernels"
harness = false

[[bench]]
name = "filter_kernels"
harness = false

[[bench]]
name = "cast_kernels"
harness = false

[[bench]]
name = "sort_kernel"
harness = false

[[bench]]
name = "length_kernel"
harness = false

[[bench]]
name = "count_zeros"
harness = false

[[bench]]
name = "growable"
harness = false

[[bench]]
name = "comparison_kernels"
harness = false


[[bench]]
name = "read_parquet"
harness = false

[[bench]]
name = "write_parquet"
harness = false

[[bench]]
name = "aggregate"
harness = false

[[bench]]
name = "write_ipc"
harness = false

[[bench]]
name = "arithmetic_kernels"
harness = false

[[bench]]
name = "bitmap"
harness = false

[[bench]]
name = "concatenate"
harness = false

[[bench]]
name = "bitmap_ops"
harness = false

[[bench]]
name = "write_csv"
harness = false

[[bench]]
name = "hash_kernel"
harness = false

[[bench]]
name = "iter_utf8"
harness = false

[[bench]]
name = "iter_list"
harness = false

[[bench]]
name = "avro_read"
harness = false

[[bench]]
name = "bitwise"
harness = false

[[bench]]
name = "write_json"
harness = false

[[bench]]
name = "read_json"
harness = false

[[bench]]
name = "slices_iterator"
harness = false

[[bench]]
name = "bitmap_assign_ops"
harness = false

[[bench]]
name = "assign_ops"
harness = false