clickhouse_arrow/
lib.rs

1//! # 🛰️ `ClickHouse` *Native Protocol* Rust Client w/ Arrow Compatibility
2//!
3//! `ClickHouse` access in rust over `ClickHouse`'s native protocol.
4//!
5//! A high-performance, async Rust client for `ClickHouse` with native Arrow integration. Designed
6//! to be faster and more memory-efficient than existing alternatives.
7//!
8//! ## Why clickhouse-arrow?
9//!
10//! - **🚀 Performance**: Optimized for speed with zero-copy deserialization where possible
11//! - **🎯 Arrow Native**: First-class Apache Arrow support for efficient data interchange
12//! - **📊 90%+ Test Coverage**: Comprehensive test suite ensuring reliability
13//! - **🔄 Async/Await**: Modern async API built on Tokio
14//! - **🗜️ Compression**: LZ4 and ZSTD support for efficient data transfer
15//! - **☁️ Cloud Ready**: Full `ClickHouse` Cloud compatibility
16//! - **🛡️ Type Safe**: Compile-time type checking with the `#[derive(Row)]` macro
17//!
18//! ## Details
19//!
20//! The crate supports two "modes" of operation:
21//!
22//! ### `ArrowFormat`
23//!
24//! Support allowing interoperability with [arrow](https://docs.rs/arrow/latest/arrow/).
25//!
26//! ### `NativeFormat`
27//!
28//! Uses internal types and custom traits if a dependency on arrow is not required.
29//!
30//! ### `CreateOptions`, `SchemaConversions`, and Schemas
31//!
32//! #### Creating Tables from Arrow Schemas
33//!
34//! `clickhouse-arrow` provides powerful DDL capabilities through `CreateOptions`, allowing you to
35//! create `ClickHouse` tables directly from Arrow schemas:
36//!
37//! ```rust,ignore
38//! use clickhouse_arrow::{Client, ArrowFormat, CreateOptions};
39//! use arrow::datatypes::{Schema, Field, DataType};
40//!
41//! // Define your Arrow schema
42//! let schema = Schema::new(vec![
43//!     Field::new("id", DataType::UInt64, false),
44//!     Field::new("name", DataType::Utf8, false),
45//!     Field::new("status", DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)), false),
46//! ]);
47//!
48//! // Configure table creation
49//! let options = CreateOptions::new("MergeTree")
50//!     .with_order_by(&["id".to_string()])
51//!     .with_partition_by("toYYYYMM(created_at)")
52//!     .with_setting("index_granularity", 8192);
53//!
54//! // Create the table
55//! client.create_table(None, "my_table", &schema, &options, None).await?;
56//! ```
57//!
58//! #### Schema Conversions for Type Control
59//!
60//! `SchemaConversions` (type alias for `HashMap<String, Type>`) provides fine-grained control over
61//! Arrow-to-ClickHouse type mappings. This is especially important for:
62//!
63//! 1. **Converting Dictionary → Enum**: By default, Arrow Dictionary types map to
64//!    `LowCardinality(String)`. Use `SchemaConversions` to map them to `Enum8` or `Enum16` instead:
65//!
66//! ```rust,ignore
67//! use clickhouse_arrow::{Type, CreateOptions};
68//! use std::collections::HashMap;
69//!
70//! let schema_conversions = HashMap::from([
71//!     // Convert status column from Dictionary to Enum8
72//!     ("status".to_string(), Type::Enum8(vec![
73//!         ("active".to_string(), 0),
74//!         ("inactive".to_string(), 1),
75//!         ("pending".to_string(), 2),
76//!     ])),
77//!     // Convert category to Enum16 for larger enums
78//!     ("category".to_string(), Type::Enum16(vec![
79//!         ("electronics".to_string(), 0),
80//!         ("clothing".to_string(), 1),
81//!         // ... up to 65k values
82//!     ])),
83//! ]);
84//!
85//! let options = CreateOptions::new("MergeTree")
86//!     .with_order_by(&["id".to_string()])
87//!     .with_schema_conversions(schema_conversions);
88//! ```
89//!
90//! 2. **Geo Types**: Preserve geographic types during conversion
91//! 3. **Date Types**: Choose between `Date` and `Date32`
92//! 4. **Custom Type Mappings**: Override any default type conversion
93//!
94//! #### Field Naming Constants
95//!
96//! When working with complex Arrow types, use these constants to ensure compatibility:
97//!
98//! ```rust,ignore
99//! use clickhouse_arrow::arrow::types::*;
100//!
101//! // For List types - inner field is named "item"
102//! let list_field = Field::new("data", DataType::List(
103//!     Arc::new(Field::new(LIST_ITEM_FIELD_NAME, DataType::Int32, true))
104//! ), true);
105//!
106//! // For Struct/Tuple types - fields are named "field_0", "field_1", etc.
107//! let tuple_fields = vec![
108//!     Field::new(format!("{}{}", TUPLE_FIELD_NAME_PREFIX, 0), DataType::Int32, false),
109//!     Field::new(format!("{}{}", TUPLE_FIELD_NAME_PREFIX, 1), DataType::Utf8, false),
110//! ];
111//!
112//! // For Map types - uses specific field names
113//! let map_type = DataType::Map(
114//!     Arc::new(Field::new(MAP_FIELD_NAME, DataType::Struct(
115//!         vec![
116//!             Field::new(STRUCT_KEY_FIELD_NAME, DataType::Utf8, false),
117//!             Field::new(STRUCT_VALUE_FIELD_NAME, DataType::Int32, true),
118//!         ].into()
119//!     ), false)),
120//!     false
121//! );
122//! ```
123//!
124//! These constants ensure your Arrow schemas align with `ClickHouse`'s expectations and maintain
125//! compatibility with arrow-rs conventions.
126//!
127//! ## Queries
128//!
129//! ### Query Settings
130//!
131//! The `clickhouse_arrow::Settings` type allows configuring `ClickHouse` query settings. You can
132//! import it directly:
133//!
134//! ```rust,ignore
135//! use clickhouse_arrow::Settings;
136//! // or via prelude
137//! use clickhouse_arrow::prelude::*;
138//! ```
139//!
140//! Refer to the settings module documentation for details and examples.
141//!
142//! ## Arrow Round-Trip
143//!
144//! There are cases where a round trip may deserialize a different type by schema or array than the
145//! schema and array you used to create the table.
146//!
147//!  will try to maintain an accurate and updated list as they occur. In addition, when possible, I
148//! will provide options or other functionality to alter this behavior.
149//!
150//! #### `(String|Binary)View`/`Large(List|String|Binary)` variations are normalized.
151//! - **Behavior**: `ClickHouse` does not make the same distinction between `Utf8`, `Utf8View`, or
152//!   `LargeUtf8`. All of these are mapped to either `Type::Binary` (the default, see above) or
153//!   `Type::String`
154//! - **Option**: None
155//! - **Default**: Unsupported
156//! - **Impact**: When deserializing from `ClickHouse`, manual modification will be necessary to use
157//!   these data types.
158//!
159//! #### `Utf8` -> `Binary`
160//! - **Behavior**: By default, `Type::String`/`DataType::Utf8` will be represented as Binary.
161//! - **Option**: `strings_as_strings` (default: `false`).
162//! - **Default**: Disabled (`false`).
163//! - **Impact**: Set to `true` to strip map `Type::String` -> `DataType::Utf8`. Binary tends to be
164//!   more efficient to work with in high throughput scenarios
165//!
166//! #### Nullable `Array`s
167//! - **Behavior**: `ClickHouse` does not allow `Nullable(Array(...))`, but insertion with non-null
168//!   data is allowed by default. To modify this behavior, set `array_nullable_error` to `true`.
169//! - **Option**: `array_nullable_error` (default: `false`).
170//! - **Default**: Disabled (`false`).
171//! - **Impact**: Enables flexible insertion but may cause schema mismatches if nulls are present.
172//!
173//! #### `LowCardinality(Nullable(...))` vs `Nullable(LowCardinality(...))`
174//! - **Behavior**: Like arrays mentioned above, `ClickHouse` does not allow nullable low
175//!   cardinality. The default behavior is to push down the nullability.
176//! - **Option**: `low_cardinality_nullable_error` (default: `false`).
177//! - **Default**: Disabled (`false`).
178//! - **Impact**: Enables flexible insertion but may cause schema mismatches if nulls are present.
179//!
180//! #### `Enum8`/`Enum16` vs. `LowCardinality`
181//! - **Behavior**: Arrow `Dictionary` types map to `LowCardinality`, but `ClickHouse` `Enum` types
182//!   may also map to `Dictionary`, altering the type on round-trip.
183//! - **Option**: No options available rather provide hash maps for either `enum_i8` and/or
184//!   `enum_i16` for `CreateOptions` during schema creation.
185//! - **Impact**: The default behavior will ignore enums when starting from arrow.
186
187#![allow(unused_crate_dependencies)]
188
189pub mod arrow;
190mod client;
191mod compression;
192mod constants;
193mod errors;
194mod flags;
195mod formats;
196mod io;
197pub mod native;
198#[cfg(feature = "pool")]
199mod pool;
200pub mod prelude;
201mod query;
202mod schema;
203mod settings;
204pub mod spawn;
205pub mod telemetry;
206#[cfg(any(feature = "test-utils", feature = "tmpfs-size"))]
207pub mod test_utils;
208
209#[cfg(feature = "derive")]
210/// Derive macro for the [Row] trait.
211///
212/// This is similar in usage and implementation to the [`serde::Serialize`] and
213/// [`serde::Deserialize`] derive macros.
214///
215/// ## serde attributes
216/// The following [serde attributes](https://serde.rs/attributes.html) are supported, using `#[clickhouse_arrow(...)]` instead of `#[serde(...)]`:
217/// - `with`
218/// - `from` and `into`
219/// - `try_from`
220/// - `skip`
221/// - `default`
222/// - `deny_unknown_fields`
223/// - `rename`
224/// - `rename_all`
225/// - `serialize_with`, `deserialize_with`
226/// - `skip_deserializing`, `skip_serializing`
227/// - `flatten`
228///    - Index-based matching is disabled (the column names must match exactly).
229///    - Due to the current interface of the [Row] trait, performance might not be optimal, as
230///      a value map must be reconstitued for each flattened subfield.
231///
232/// ## ClickHouse-specific attributes
233/// - The `nested` attribute allows handling [ClickHouse nested data structures](https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested).
234///   See an example in the `tests` folder.
235///
236/// ## Known issues
237/// - For serialization, the ordering of fields in the struct declaration must match the order in the `INSERT` statement, respectively in the table declaration. See issue [#34](https://github.com/Protryon/clickhouse_arrow/issues/34).
238pub use clickhouse_arrow_derive::Row;
239pub use client::*;
240/// Set this environment to enable additional debugs around arrow (de)serialization.
241pub use constants::{CONN_READ_BUFFER_ENV_VAR, CONN_WRITE_BUFFER_ENV_VAR, DEBUG_ARROW_ENV_VAR};
242pub use errors::*;
243pub use formats::{ArrowFormat, ClientFormat, NativeFormat};
244/// Contains useful top-level traits to interface with [`crate::prelude::NativeFormat`]
245pub use native::convert::*;
246pub use native::progress::Progress;
247pub use native::protocol::{ChunkedProtocolMode, ProfileEvent};
248/// Represents the types that `ClickHouse` supports internally.
249pub use native::types::*;
250/// Contains useful top-level structures to interface with [`crate::prelude::NativeFormat`]
251pub use native::values::*;
252pub use native::{CompressionMethod, ServerError, Severity};
253#[cfg(feature = "pool")]
254pub use pool::*;
255pub use query::{ParamValue, ParsedQuery, Qid, QueryParams};
256pub use schema::CreateOptions;
257pub use settings::{Setting, SettingValue, Settings};
258
259mod aliases {
260    /// A non-cryptographically secure [`std::hash::BuildHasherDefault`] using
261    /// [`rustc_hash::FxHasher`].
262    pub type HashBuilder = std::hash::BuildHasherDefault<rustc_hash::FxHasher>;
263    /// A non-cryptographically secure [`indexmap::IndexMap`] using [`HashBuilder`].
264    pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, HashBuilder>;
265}
266// Type aliases used throughout the library
267pub use aliases::*;
268// External libraries
269mod reexports {
270    #[cfg(feature = "pool")]
271    pub use bb8;
272    pub use chrono_tz::Tz;
273    pub use indexmap::IndexMap;
274    pub use uuid::Uuid;
275    pub use {rustc_hash, tracing};
276}
277/// Re-exports
278///
279/// Exporting different external modules used by the library.
280pub use reexports::*;
281
282#[cfg(test)]
283mod dev_deps {
284    //! This is here to silence rustc's unused-crate-dependencies warnings.
285    //! See tracking issue [#95513](https://github.com/rust-lang/rust/issues/95513).
286    use {clickhouse as _, criterion as _};
287}