minarrow/
lib.rs

1//! # **Minarrow** – High-Performance Rust with Apache Arrow Compatibility
2//!
3//! Modern Rust implementation of the Apache Arrow zero-copy memory layout,
4//! for high-performance computing, streaming, and embedded systems.
5//! Built for those who like it fast and simple.
6//!
7//! ## Key Features
8//! - **Fast compile times** – typically <1.5s for standard builds, <0.15s for rebuilds.
9//! - **64-byte SIMD alignment** for optimal CPU utilisation.
10//! - **High runtime performance** – see benchmarks below.
11//! - Cohesive, well-documented API with extensive coverage.
12//! - Built-in FFI with simple `to_apache_arrow()` and `to_polars()` conversions.
13//! - MIT Licensed.
14//!
15//! ## Upcoming Additions
16//! 1. **Lightstream-IO** – IPC streaming and Tokio async integration.  
17//! 2. **SIMD Kernels** – Large library of pre-optimised computation kernels.  
18//!
19//! ## Compatibility
20//! Implements Apache Arrow’s documented memory layouts while simplifying some APIs.
21//! Additional logical types are provided where they add practical value.
22//! Learn more about Apache Arrow at: <https://arrow.apache.org/overview/>.  
23//!
24//! Minarrow is not affiliated with Apache Arrow or the Apache Software Foundation.
25//! *Apache Arrow* is a registered trademark of the ASF, referenced under fair use.
26//!
27//! ## Acknowledgements
28//! Thanks to the Apache Arrow community and contributors, with inspiration
29//! from `Arrow2` and `Polars`.
30//!
31//! ## Requirements
32//! Requires Rust nightly for features such as `allocator_api`.
33//!
34//! ## Benchmarks
35//!
36//! **Intel(R) Core(TM) Ultra 7 155H | x86_64 | 22 CPUs**  
37//!
38//! ### No SIMD
39//! ***(n=1000, lanes=4, iters=1000)***
40//!
41//! | Case                            | Avg time |
42//! |---------------------------------|----------|
43//! | Vec<i64>                        | 85 ns    |
44//! | Minarrow direct IntegerArray    | 88 ns    |
45//! | arrow-rs struct Int64Array      | 147 ns   |
46//! | Minarrow enum IntegerArray      | 124 ns   |
47//! | arrow-rs dyn Int64Array         | 181 ns   |
48//! | Vec<f64>                        | 475 ns   |
49//! | Minarrow direct FloatArray      | 476 ns   |
50//! | arrow-rs struct Float64Array    | 527 ns   |
51//! | Minarrow enum FloatArray        | 507 ns   |
52//! | arrow-rs dyn Float64Array       | 1.952 µs |
53//!
54//! ### SIMD
55//! ***(n=1000, lanes=4, iters=1000)***
56//!
57//! | Case                            | Avg time |
58//! |---------------------------------|----------|
59//! | Vec<i64>                        | 64 ns    |
60//! | Vec64<i64>                      | 55 ns    |
61//! | Minarrow direct IntegerArray    | 88 ns    |
62//! | arrow-rs struct Int64Array      | 162 ns   |
63//! | Minarrow enum IntegerArray      | 170 ns   |
64//! | arrow-rs dyn Int64Array         | 173 ns   |
65//! | Vec<f64>                        | 57 ns    |
66//! | Vec64<f64>                      | 58 ns    |
67//! | Minarrow direct FloatArray      | 91 ns    |
68//! | arrow-rs struct Float64Array    | 181 ns   |
69//! | Minarrow enum FloatArray        | 180 ns   |
70//! | arrow-rs dyn Float64Array       | 196 ns   |
71//!
72//! ### SIMD + Rayon
73//! ***(n=1,000,000,000, lanes=4)***
74//!
75//! | Case                              | Time (ms) |
76//! |-----------------------------------|-----------|
77//! | SIMD + Rayon IntegerArray<i64>    | 113.874   |
78//! | SIMD + Rayon FloatArray<f64>      | 114.095   |
79//!
80//! _Construction time for Vec<i64> (87 ns) and Vec64<i64> (84 ns) excluded from benchmarks._
81
82#![feature(allocator_api)]
83#![feature(slice_ptr_get)]
84#![feature(portable_simd)]
85
86pub use vec64::{Vec64, vec64};
87
88/// **Array**, **TextArray**, **NumericArray**...- *All the *High-Level Array containers* are here.*
89pub mod enums {
90    pub mod array;
91    pub mod error;
92    #[cfg(feature = "scalar_type")]
93    pub mod scalar;
94    pub mod time_units;
95    #[cfg(feature = "value_type")]
96    pub mod value;
97    pub mod collections {
98        pub mod numeric_array;
99        #[cfg(feature = "datetime")]
100        pub mod temporal_array;
101        pub mod text_array;
102    }
103    pub mod operators;
104    pub mod shape_dim;
105}
106
107/// Contains SIMD-accelerated kernels for the 'essentials' that are highly coupled to this crate
108/// The extensive set is available downstream in the simd-kernels crate, including a full
109/// set of univariate distributions.
110pub mod kernels {
111    pub mod arithmetic;
112    pub mod bitmask;
113    #[cfg(feature = "broadcast")]
114    pub mod broadcast;
115    #[cfg(feature = "views")]
116    pub mod routing;
117    pub mod string;
118}
119
120/// **Table**, **IntegerArray**, **FloatArray**, **Vec64** - *All the **Low-Level Control**, **Tables** and **Views***.
121pub mod structs {
122
123    #[cfg(feature = "chunked")]
124    pub mod chunked {
125        pub mod super_array;
126        pub mod super_table;
127    }
128
129    pub mod variants {
130        pub mod boolean;
131        pub mod categorical;
132        #[cfg(feature = "datetime")]
133        pub mod datetime;
134        pub mod float;
135        pub mod integer;
136        pub mod string;
137    }
138    pub mod views {
139        #[cfg(feature = "views")]
140        #[cfg(feature = "chunked")]
141        pub mod chunked {
142            pub mod super_array_view;
143            pub mod super_table_view;
144        }
145        #[cfg(feature = "views")]
146        pub mod collections {
147            pub mod numeric_array_view;
148            #[cfg(feature = "datetime")]
149            pub mod temporal_array_view;
150            pub mod text_array_view;
151        }
152        #[cfg(feature = "views")]
153        pub mod array_view;
154        pub mod bitmask_view;
155
156        #[cfg(feature = "views")]
157        pub mod table_view;
158    }
159    pub mod bitmask;
160    pub mod buffer;
161    pub mod column;
162    #[cfg(feature = "cube")]
163    pub mod cube;
164    pub mod field;
165    pub mod field_array;
166    #[cfg(feature = "matrix")]
167    pub mod matrix;
168    pub mod shared_buffer;
169    pub mod table;
170}
171
172/// **Shared Memory** - *Sending data over FFI like a Pro? Look here.*
173pub mod ffi {
174    pub mod arrow_c_ffi;
175    pub mod arrow_dtype;
176    pub mod schema;
177}
178
179/// **Type Standardisation** - `MaskedArray`, `View`, `Print` traits + more,
180pub mod traits {
181    #[cfg(feature = "size")]
182    pub mod byte_size;
183    pub mod concatenate;
184    pub mod custom_value;
185    pub mod masked_array;
186    pub mod print;
187    #[cfg(feature = "select")]
188    pub mod selection;
189    pub mod shape;
190    pub mod type_unions;
191    #[cfg(feature = "views")]
192    pub mod view;
193}
194
195pub mod aliases;
196pub mod conversions;
197pub mod macros;
198pub mod utils;
199
200pub use aliases::{
201    ArrayVT, BitmaskVT, BooleanAVT, BytesLength, CategoricalAVT, CategoricalAVTExt, DictLength,
202    FloatAVT, IntegerAVT, Length, Offset, StringAVT, StringAVTExt,
203};
204
205#[cfg(feature = "datetime")]
206pub use aliases::DatetimeAVT;
207pub use enums::array::Array;
208pub use enums::collections::numeric_array::NumericArray;
209#[cfg(feature = "datetime")]
210pub use enums::collections::temporal_array::TemporalArray;
211pub use enums::collections::text_array::TextArray;
212#[cfg(feature = "scalar_type")]
213pub use enums::scalar::Scalar;
214#[cfg(feature = "datetime")]
215pub use enums::time_units::{IntervalUnit, TimeUnit};
216#[cfg(feature = "value_type")]
217pub use enums::value::Value;
218
219pub use structs::bitmask::Bitmask;
220pub use structs::buffer::Buffer;
221#[cfg(feature = "chunked")]
222pub use structs::chunked::{super_array::SuperArray, super_table::SuperTable};
223#[cfg(feature = "views")]
224pub use structs::views::array_view::ArrayV;
225pub use structs::views::bitmask_view::BitmaskV;
226#[cfg(feature = "views")]
227#[cfg(feature = "chunked")]
228pub use structs::views::chunked::{super_array_view::SuperArrayV, super_table_view::SuperTableV};
229#[cfg(feature = "views")]
230pub use structs::views::collections::numeric_array_view::NumericArrayV;
231#[cfg(feature = "views")]
232#[cfg(feature = "datetime")]
233pub use structs::views::collections::temporal_array_view::TemporalArrayV;
234#[cfg(feature = "views")]
235pub use structs::views::collections::text_array_view::TextArrayV;
236
237pub use ffi::arrow_dtype::ArrowType;
238pub use structs::column::{Column, column};
239#[cfg(feature = "cube")]
240pub use structs::cube::Cube;
241pub use structs::field::Field;
242pub use structs::field_array::FieldArray;
243#[cfg(feature = "matrix")]
244pub use structs::matrix::Matrix;
245pub use structs::shared_buffer::SharedBuffer;
246pub use structs::table::Table;
247pub use structs::variants::boolean::BooleanArray;
248pub use structs::variants::categorical::CategoricalArray;
249#[cfg(feature = "datetime")]
250pub use structs::variants::datetime::DatetimeArray;
251pub use structs::variants::float::FloatArray;
252pub use structs::variants::integer::IntegerArray;
253pub use structs::variants::string::StringArray;
254#[cfg(feature = "views")]
255pub use structs::views::table_view::TableV;
256#[cfg(feature = "size")]
257pub use traits::byte_size::ByteSize;
258pub use traits::concatenate::Concatenate;
259pub use traits::masked_array::MaskedArray;
260pub use traits::print::Print;
261pub use traits::type_unions::{Float, Integer, Numeric, Primitive};