Skip to main content

minarrow/
lib.rs

1// Copyright 2025 Peter Garfield Bower
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! # **Minarrow** – High-Performance Rust with Apache Arrow Compatibility
16//!
17//! Modern Rust implementation of the Apache Arrow zero-copy memory layout,
18//! for high-performance computing, streaming, and embedded systems.
19//! Built for those who like it fast and simple.
20//!
21//! ## Key Features
22//! - **Fast compile times** – typically <2s for standard builds, <0.15s for rebuilds.
23//! - **64-byte SIMD alignment** for optimal CPU utilisation.
24//! - **High runtime performance** – see benchmarks below.
25//! - **Typed enum accessors** - `arr.num().f64()` returns a shared handle,
26//!   an Arc bump when the variant matches. `try_` variants return `Result`.
27//! - Cohesive, well-documented API with extensive coverage.
28//! - Built-in FFI with simple `to_apache_arrow()` and `to_polars()` conversions.
29//! - Apache-2.0 Licensed.
30//!
31//! ## Quick Start
32//! ```rust
33//! use minarrow::{MaskedArray, Print, arr_f64, fa_i32, fa_str32, tbl};
34//!
35//! // Create arrays with macros
36//! let prices = arr_f64![10.5, 20.0, 15.75];
37//!
38//! // Direct typed access - no downcasting
39//! assert_eq!(prices.num().f64().get(0), Some(10.5));
40//!
41//! // Build tables via FieldArrays with constructor macros
42//! let table = tbl!(
43//!     "users",
44//!     fa_i32!("id", 1, 2, 3),
45//!     fa_str32!("name", "alice", "bob", "charlie"),
46//! );
47//! table.print();
48//! ```
49//!
50//! ## Ecosystem
51//! - [`lightstream`](https://crates.io/crates/lightstream) – zero-copy Arrow IPC streaming
52//!   with SIMD-aligned I/O over Tokio, TCP, QUIC, WebSocket, Unix sockets, and Stdio.
53//! - `minarrow-pyo3` – zero-copy Python interop via PyArrow.
54//! - [`vec64`](https://crates.io/crates/vec64) – the 64-byte aligned Vec backing every buffer.
55//! - [Lightning Analytics Engine](https://spacecell.com) – sub-millisecond, zero-config
56//!   live streaming engine with statistical modelling and data processing, built on Minarrow.
57//!
58//! ## Compatibility
59//! Implements Apache Arrow’s documented memory layouts while simplifying some APIs.
60//! Additional logical types are provided where they add practical value.
61//! Learn more about Apache Arrow at: <https://arrow.apache.org/overview/>.
62//!
63//! Minarrow is not affiliated with Apache Arrow or the Apache Software Foundation.
64//! *Apache Arrow* is a registered trademark of the ASF, referenced under fair use.
65//!
66//! ## Acknowledgements
67//! Thanks to the Apache Arrow community and contributors, with inspiration
68//! from `Arrow2` and `Polars`.
69//!
70//! ## Requirements
71//! Requires Rust nightly for features such as `allocator_api`.
72//!
73//! ## Benchmarks
74//!
75//! **Intel(R) Core(TM) Ultra 7 155H | x86_64 | 22 CPUs**
76//!
77//! ### No SIMD
78//! ***(n=1000, lanes=4, iters=1000)***
79//!
80//! | Case                            | Avg time |
81//! |---------------------------------|----------|
82//! | Vec<i64>                        | 85 ns    |
83//! | Minarrow direct IntegerArray    | 88 ns    |
84//! | arrow-rs struct Int64Array      | 147 ns   |
85//! | Minarrow enum IntegerArray      | 124 ns   |
86//! | arrow-rs dyn Int64Array         | 181 ns   |
87//! | Vec<f64>                        | 475 ns   |
88//! | Minarrow direct FloatArray      | 476 ns   |
89//! | arrow-rs struct Float64Array    | 527 ns   |
90//! | Minarrow enum FloatArray        | 507 ns   |
91//! | arrow-rs dyn Float64Array       | 1.952 µs |
92//!
93//! ### SIMD
94//! ***(n=1000, lanes=4, iters=1000)***
95//!
96//! | Case                            | Avg time |
97//! |---------------------------------|----------|
98//! | Vec<i64>                        | 64 ns    |
99//! | Vec64<i64>                      | 55 ns    |
100//! | Minarrow direct IntegerArray    | 88 ns    |
101//! | arrow-rs struct Int64Array      | 162 ns   |
102//! | Minarrow enum IntegerArray      | 170 ns   |
103//! | arrow-rs dyn Int64Array         | 173 ns   |
104//! | Vec<f64>                        | 57 ns    |
105//! | Vec64<f64>                      | 58 ns    |
106//! | Minarrow direct FloatArray      | 91 ns    |
107//! | arrow-rs struct Float64Array    | 181 ns   |
108//! | Minarrow enum FloatArray        | 180 ns   |
109//! | arrow-rs dyn Float64Array       | 196 ns   |
110//!
111//! ### SIMD + Rayon
112//! ***(n=1,000,000,000, lanes=4)***
113//!
114//! | Case                              | Time (ms) |
115//! |-----------------------------------|-----------|
116//! | SIMD + Rayon IntegerArray<i64>    | 113.874   |
117//! | SIMD + Rayon FloatArray<f64>      | 114.095   |
118//!
119//! _Construction time for Vec<i64> (87 ns) and Vec64<i64> (84 ns) excluded from benchmarks._
120
121#![feature(allocator_api)]
122#![feature(portable_simd)]
123
124pub use ::vec64::{Vec64, Vec64Alloc, vec64};
125
126/// **Array**, **TextArray**, **NumericArray**...- *All the *High-Level Array containers* are here.*
127pub mod enums {
128    pub mod array;
129    pub mod error;
130    #[cfg(feature = "scalar_type")]
131    pub mod scalar;
132    pub mod time_units;
133    #[cfg(feature = "value_type")]
134    pub mod value;
135    pub mod collections {
136        pub mod numeric_array;
137        #[cfg(feature = "datetime")]
138        pub mod temporal_array;
139        pub mod text_array;
140    }
141    pub mod operators;
142    pub mod shape_dim;
143}
144
145/// Contains SIMD-accelerated kernels for standard arithmetic, string, bitmask and logical operations.
146pub mod kernels {
147    pub mod arithmetic;
148    pub mod bitmask;
149    #[cfg(feature = "broadcast")]
150    pub mod broadcast;
151    #[cfg(feature = "datetime_ops")]
152    pub mod datetime;
153    #[cfg(feature = "views")]
154    pub mod routing;
155    pub mod string;
156}
157
158/// **Table**, **IntegerArray**, **FloatArray**, **Vec64** - *All the **Low-Level Control**, **Tables** and **Views***.
159pub mod structs {
160    #[cfg(feature = "arena")]
161    pub mod arena;
162
163    #[cfg(feature = "chunked")]
164    pub mod chunked {
165        pub mod super_array;
166        pub mod super_table;
167    }
168
169    pub mod variants {
170        pub mod boolean;
171        pub mod categorical;
172        #[cfg(feature = "datetime")]
173        pub mod datetime;
174        pub mod float;
175        pub mod integer;
176        pub mod string;
177    }
178    pub mod views {
179        #[cfg(feature = "views")]
180        #[cfg(feature = "chunked")]
181        pub mod chunked {
182            pub mod super_array_view;
183            pub mod super_table_view;
184        }
185        #[cfg(feature = "views")]
186        pub mod collections {
187            pub mod boolean_array_view;
188            pub mod numeric_array_view;
189            #[cfg(feature = "datetime")]
190            pub mod temporal_array_view;
191            pub mod text_array_view;
192        }
193        #[cfg(feature = "views")]
194        pub mod array_view;
195        pub mod bitmask_view;
196
197        #[cfg(feature = "views")]
198        pub mod table_view;
199    }
200    pub mod bitmask;
201    pub mod buffer;
202    #[cfg(feature = "lbuffer")]
203    pub mod lbuffer;
204    pub mod column;
205    #[cfg(feature = "shared_dict")]
206    pub mod dictionary;
207    #[cfg(feature = "cube")]
208    pub mod cube;
209    pub mod field;
210    pub mod field_array;
211    #[cfg(feature = "matrix")]
212    pub mod matrix;
213    pub mod shared_buffer;
214    pub mod table;
215}
216
217/// **Shared Memory** - *For sending data to other runtime(s) over FFI.*
218pub mod ffi {
219    pub mod arrow_c_ffi;
220    pub mod arrow_dtype;
221    pub mod schema;
222    #[cfg(feature = "cast_arrow")]
223    pub mod arrow_rs;
224    #[cfg(feature = "cast_polars")]
225    pub mod polars;
226}
227
228/// **Type Standardisation** - `MaskedArray`, `View`, `Print` traits + more,
229pub mod traits {
230    #[cfg(feature = "size")]
231    pub mod byte_size;
232    pub mod combine;
233    pub mod concatenate;
234    #[cfg(feature = "chunked")]
235    pub mod consolidate;
236    pub mod custom_value;
237    #[cfg(feature = "datetime_ops")]
238    pub mod datetime_ops;
239    pub mod masked_array;
240    pub mod print;
241    #[cfg(feature = "select")]
242    pub mod selection;
243    pub mod shape;
244    pub mod type_unions;
245    #[cfg(feature = "views")]
246    pub mod view;
247}
248
249pub mod aliases;
250pub mod conversions;
251pub mod macros;
252pub mod utils;
253
254pub use aliases::{
255    ArrayVT, BitmaskVT, BooleanAVT, BytesLength, CategoricalAVT, CategoricalAVTExt, DictLength,
256    FloatAVT, IntegerAVT, Length, Offset, StringAVT, StringAVTExt,
257};
258
259#[cfg(feature = "datetime")]
260pub use aliases::DatetimeAVT;
261pub use enums::array::Array;
262pub use enums::collections::numeric_array::NumericArray;
263#[cfg(feature = "datetime")]
264pub use enums::collections::temporal_array::TemporalArray;
265pub use enums::collections::text_array::TextArray;
266#[cfg(feature = "scalar_type")]
267pub use enums::scalar::Scalar;
268#[cfg(feature = "datetime")]
269pub use enums::time_units::{IntervalUnit, TimePeriod, TimeUnit};
270#[cfg(feature = "value_type")]
271pub use enums::value::Value;
272
273#[cfg(feature = "arena")]
274pub use structs::arena::{AAMaker, Arena, ArenaRegion};
275pub use structs::bitmask::Bitmask;
276pub use structs::buffer::Buffer;
277#[cfg(feature = "lbuffer")]
278pub use structs::lbuffer::{LBuffer, LBufferV};
279#[cfg(feature = "chunked")]
280pub use structs::chunked::{
281    super_array::{RechunkStrategy, SuperArray},
282    super_table::SuperTable,
283};
284#[cfg(feature = "views")]
285pub use structs::views::array_view::ArrayV;
286pub use structs::views::bitmask_view::BitmaskV;
287#[cfg(feature = "views")]
288#[cfg(feature = "chunked")]
289pub use structs::views::chunked::{super_array_view::SuperArrayV, super_table_view::SuperTableV};
290#[cfg(feature = "views")]
291pub use structs::views::collections::boolean_array_view::BooleanArrayV;
292#[cfg(feature = "views")]
293pub use structs::views::collections::numeric_array_view::NumericArrayV;
294#[cfg(feature = "views")]
295#[cfg(feature = "datetime")]
296pub use structs::views::collections::temporal_array_view::TemporalArrayV;
297#[cfg(feature = "views")]
298pub use structs::views::collections::text_array_view::TextArrayV;
299
300pub use ffi::arrow_dtype::ArrowType;
301pub use structs::column::{Column, column};
302#[cfg(feature = "shared_dict")]
303pub use structs::dictionary::Dictionary;
304#[cfg(feature = "cube")]
305pub use structs::cube::Cube;
306pub use structs::field::Field;
307pub use structs::field_array::{FieldArray, field_array};
308#[cfg(feature = "matrix")]
309pub use structs::matrix::Matrix;
310pub use structs::shared_buffer::SharedBuffer;
311pub use structs::table::Table;
312pub use structs::variants::boolean::BooleanArray;
313pub use structs::variants::categorical::CategoricalArray;
314#[cfg(feature = "datetime")]
315pub use structs::variants::datetime::DatetimeArray;
316#[cfg(feature = "datetime")]
317pub use structs::variants::datetime::parse::{parse_iso8601_utc, parse_iso8601_utc_ns};
318pub use structs::variants::float::FloatArray;
319pub use structs::variants::integer::IntegerArray;
320pub use structs::variants::string::StringArray;
321#[cfg(feature = "views")]
322pub use structs::views::table_view::TableV;
323#[cfg(feature = "size")]
324pub use traits::byte_size::ByteSize;
325pub use traits::combine::Combine;
326pub use traits::concatenate::Concatenate;
327#[cfg(feature = "chunked")]
328pub use traits::consolidate::Consolidate;
329#[cfg(feature = "datetime_ops")]
330pub use traits::datetime_ops::DatetimeOps;
331pub use traits::masked_array::MaskedArray;
332pub use traits::print::Print;
333#[cfg(feature = "select")]
334pub use traits::selection::{ColumnSelection, RowSelection, Selection2D};
335pub use traits::type_unions::{Float, Integer, Numeric, Primitive};