vortex_array/lib.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3//! Vortex crate containing core logic for encoding and memory representation of [arrays](ArrayRef).
4//!
5//! At the heart of Vortex are [arrays](ArrayRef).
6//!
7//! Arrays are typed views of memory buffers that hold [scalars](crate::scalar::Scalar). These
8//! buffers can be held in a number of physical encodings to perform lightweight compression that
9//! exploits the particular data distribution of the array's values.
10//!
11//! Every data type recognized by Vortex also has a canonical physical encoding format, which
12//! arrays can be [canonicalized](Canonical) into for ease of access in compute functions.
13//!
14//! # Core Handles
15//!
16//! [`ArrayRef`] is the erased, shared handle used by most public APIs. It carries the logical
17//! [`DType`], row count, encoding id, children, buffers, and statistics for an
18//! array tree. Use it when an API should accept any encoding.
19//!
20//! [`Array<V>`] is the typed owned handle for a known encoding `V: VTable`. It wraps an
21//! [`ArrayRef`] and dereferences to the encoding-specific `V::TypedArrayData`.
22//!
23//! [`ArrayView<V>`] is the lightweight typed borrow handed to vtable methods. It exposes both the
24//! shared [`ArrayRef`] metadata and the encoding-specific data without cloning the handle.
25//!
26//! [`ArrayParts<V>`] is the construction boundary for typed arrays. It groups externally supplied
27//! logical metadata and encoding data, then [`Array::try_from_parts`] validates that they agree.
28//!
29//! # Logical Types and Physical Encodings
30//!
31//! A [`DType`] describes the logical values an array may hold. It does not
32//! describe the memory layout. For example, a `DType::Primitive(I32, Nullable)` can be stored as a
33//! canonical [`PrimitiveArray`], a dictionary, a slice, or a
34//! compressed external encoding.
35//!
36//! The [`Canonical`] enum names the default uncompressed encoding for each logical family. Execution
37//! normally moves an array tree toward canonical form, but canonicalization is shallow: children of
38//! canonical struct/list arrays may still be encoded.
39//!
40//! # Built-in, Lazy, and Experimental Arrays
41//!
42//! Built-in arrays live in [`arrays`]. Some are canonical (`PrimitiveArray`, `StructArray`,
43//! `VarBinViewArray`); others are utility or lazy arrays such as [`ChunkedArray`],
44//! [`ConstantArray`], [`FilterArray`], [`SliceArray`], and [`ScalarFnArray`].
45//! Lazy arrays defer work so compute kernels can operate on encoded data or prune children
46//! before materialization.
47//!
48//! Experimental arrays are public because they are used inside Vortex, but their storage contracts
49//! may still move. Prefer the higher-level constructors and accessors documented on each array
50//! module rather than relying on child slot order.
51//!
52//! # Nulls and Scalars
53//!
54//! [`Validity`](crate::validity::Validity) separates nullness from values. It can be a cheap
55//! constant state (`NonNullable`, `AllValid`, `AllInvalid`) or a boolean array that may itself be
56//! encoded. [`Scalar`](crate::scalar::Scalar) is the single-value counterpart: it pairs a
57//! [`DType`] with an optional [`ScalarValue`](crate::scalar::ScalarValue).
58//!
59//! # Extending Vortex
60//!
61//! New array encodings implement [`VTable`], usually through the local `array_slots!` and
62//! `vtable!` patterns used by built-ins. The important extension contracts are:
63//!
64//! - [`VTable::validate`] checks that externally supplied dtype, length, slots, and data agree.
65//! - [`VTable::execute`] returns an [`ExecutionResult`] that makes progress toward canonical form.
66//! - [`OperationsVTable`] provides scalar access.
67//! - [`ValidityVTable`] exposes validity only for nullable arrays.
68//!
69//! New logical extension dtypes implement [`ExtVTable`](crate::dtype::extension::ExtVTable) and
70//! store values in an ordinary Vortex storage dtype.
71//!
72//! [`PrimitiveArray`]: crate::arrays::PrimitiveArray
73//! [`DType`]: crate::dtype::DType
74//! [`ChunkedArray`]: crate::arrays::ChunkedArray
75//! [`ConstantArray`]: crate::arrays::ConstantArray
76//! [`FilterArray`]: crate::arrays::FilterArray
77//! [`SliceArray`]: crate::arrays::SliceArray
78//! [`ScalarFnArray`]: crate::arrays::ScalarFnArray
79
80extern crate self as vortex_array;
81
82use std::sync::LazyLock;
83
84pub use array::*;
85pub use canonical::*;
86pub use columnar::*;
87pub use executor::*;
88pub use hash::*;
89pub use mask_future::*;
90pub use metadata::*;
91pub use smallvec;
92pub use vortex_array_macros::array_slots;
93use vortex_session::SessionExt;
94use vortex_session::VortexSession;
95use vortex_session::registry::Context;
96
97use crate::aggregate_fn::session::AggregateFnSession;
98use crate::arrow::ArrowSession;
99use crate::dtype::session::DTypeSession;
100use crate::memory::MemorySession;
101use crate::optimizer::kernels::KernelSession;
102use crate::scalar_fn::session::ScalarFnSession;
103use crate::session::ArraySession;
104use crate::stats::session::StatsSession;
105
106pub mod aggregate_fn;
107#[doc(hidden)]
108pub mod aliases;
109mod arc_swap_map;
110mod array;
111pub mod arrays;
112pub mod arrow;
113pub mod buffer;
114pub mod builders;
115pub mod builtins;
116mod canonical;
117mod columnar;
118pub mod compute;
119pub mod display;
120pub mod dtype;
121mod executor;
122pub mod expr;
123mod expression;
124pub mod extension;
125mod hash;
126pub mod iter;
127pub mod kernel;
128pub mod mask;
129mod mask_future;
130pub mod matcher;
131pub mod memory;
132mod metadata;
133pub mod normalize;
134pub mod optimizer;
135mod partial_ord;
136pub mod patches;
137pub mod scalar;
138pub mod scalar_fn;
139pub mod search_sorted;
140pub mod serde;
141pub mod session;
142pub mod stats;
143pub mod stream;
144#[cfg(any(test, feature = "_test-harness"))]
145pub mod test_harness;
146pub mod validity;
147pub mod variants;
148
149pub mod flatbuffers {
150 //! Re-exported autogenerated code from the core Vortex flatbuffer definitions.
151 pub use vortex_flatbuffers::array::*;
152}
153
154/// Register vortex-array's built-in session-scoped kernels into the active
155/// [`ArrayKernels`](crate::optimizer::kernels::ArrayKernels) registry.
156///
157/// If the session contains a [`KernelSession`], this registers into its registry. Sessions that use
158/// [`KernelSession::default`] already receive these built-in kernels.
159pub fn initialize(session: &VortexSession) {
160 if session.get_opt::<KernelSession>().is_some() {
161 arrays::initialize(session);
162 }
163}
164
165/// Builds a fresh [`VortexSession`] registered with all of vortex-array's built-in session
166/// variables: arrays, dtypes, scalar functions, stats, optimizer kernels, aggregate functions,
167/// Arrow conversion, and memory.
168///
169/// Each call returns an independent session (with its own registries), so callers may register
170/// additional encodings or kernels into it without affecting any other session. This does not
171/// register file, layout, or runtime state — those live in higher-level crates.
172pub fn array_session() -> VortexSession {
173 VortexSession::empty()
174 .with::<ArraySession>()
175 .with::<KernelSession>()
176 .with::<DTypeSession>()
177 .with::<ScalarFnSession>()
178 .with::<StatsSession>()
179 .with::<AggregateFnSession>()
180 .with::<ArrowSession>()
181 .with::<MemorySession>()
182}
183
184// TODO(ngates): canonicalize doesn't currently take a session, therefore we cannot invoke execute
185// from the new array encodings to support back-compat for legacy encodings. So we hold a session
186// here...
187pub static LEGACY_SESSION: LazyLock<VortexSession> = LazyLock::new(array_session);
188
189pub type ArrayContext = Context<ArrayPluginRef>;