Skip to main content

array_format/
lib.rs

1//! # array-format
2//!
3//! A block-backed, footer-indexed container for storing many n-dimensional
4//! arrays in a single file.
5//!
6//! The format uses a **delta/overlay architecture**: each flush produces a
7//! self-describing sidecar file that stacks on top of the base, recording only
8//! the chunks that changed. Reads fall through to older layers for unchanged
9//! chunks, and layers can be merged back into a single file with
10//! [`compact`](ArrayFile::compact).
11//!
12//! ## Features
13//!
14//! - Store many arrays in one object (or a small set of related sidecar files).
15//! - Append arrays and update individual chunks without rewriting the whole file.
16//! - Per-block compression (LZ4, Zstd, or none) recorded in the block table, so
17//!   readers need no configuration to decode a file.
18//! - Chunked or single-chunk layouts with coordinate-addressed reads.
19//! - Logical deletes with periodic compaction to reclaim space.
20//! - Works with any [`object_store`]-compatible backend (local filesystem,
21//!   S3, GCS, Azure).
22//!
23//! ## Quick start
24//!
25//! ```
26//! use array_format::{ArrayFile, FileConfig, Lz4Codec};
27//! use ndarray::Array;
28//!
29//! # async fn example() -> array_format::Result<()> {
30//! // An in-memory file; use `ArrayFile::create(store, path, config)` for on-disk.
31//! let mut file = ArrayFile::create_memory(FileConfig::new(Lz4Codec)).await?;
32//!
33//! // Define and write a 1-D f32 array.
34//! file.define_array::<f32>("signal", vec!["t".into()], vec![4], None, None)?;
35//! let data = Array::from_vec(vec![1.0f32, 2.0, 3.0, 4.0]).into_dyn();
36//! file.write_array("signal", vec![0], data.view()).await?;
37//!
38//! // Read it back — `vec![], vec![]` means "the whole array".
39//! let out = file.read_array::<f32>("signal", vec![], vec![]).await?;
40//! assert_eq!(out.len(), 4);
41//! # Ok(())
42//! # }
43//! ```
44//!
45//! ## Architecture
46//!
47//! The crate is organized in layers:
48//!
49//! | Layer | Purpose | Key types |
50//! |-------|---------|-----------|
51//! | 0 — Core | Primitives | [`DType`], [`ChunkAddress`], [`BlockId`], [`Error`] |
52//! | 1 — Metadata | Array description | [`MergedArrayMeta`], [`FillValue`] |
53//! | 2 — Codecs | Compression extension point | [`CompressionCodec`] |
54//! | 3 — Runtime | Read / write / compact | [`ArrayFile`] |
55//!
56//! [`CompressionCodec`] is the extension point: implement it to plug in custom
57//! compression algorithms. Storage is provided through any
58//! [`object_store`]-compatible backend (passed to [`ArrayFile::create`]); for
59//! tests and ephemeral use, [`ArrayFile::create_memory`] uses `object_store`'s
60//! in-memory backend.
61//!
62//! [`ChunkAddress`]: address::ChunkAddress
63//! [`BlockId`]: address::BlockId
64//! [`object_store`]: https://docs.rs/object_store
65
66#![warn(missing_docs)]
67
68// ── Layer 0: Core types ─────────────────────────────────────────────
69pub mod address;
70mod delta;
71pub mod dtype;
72pub mod error;
73
74// ── Layer 1: Metadata ───────────────────────────────────────────────
75pub mod block;
76mod footer;
77pub mod layout;
78
79// ── Layer 2: Codec extension trait ──────────────────────────────────
80pub mod codec;
81mod storage;
82
83// ── Layer 3: Runtime ────────────────────────────────────────────────
84pub mod array;
85pub mod file;
86pub mod stats;
87
88mod ndarray_ext;
89pub mod timestamp;
90
91// ── Public re-exports ───────────────────────────────────────────────
92pub use array::ArrayElement;
93pub use codec::{CompressionCodec, Lz4Codec, NoCompression, ZstdCodec};
94pub use delta::DeltaCache;
95pub use dtype::DType;
96pub use error::{Error, Result};
97pub use file::{
98    ArrayFile, DEFAULT_BLOCK_TARGET_SIZE, DEFAULT_CACHE_CAPACITY, DEFAULT_IO_CACHE_CAPACITY,
99    FileConfig, MergedArrayMeta,
100};
101pub use layout::{AttributeValue, FillValue};
102pub use stats::{ArrayStats, StatValue, StatsFile};
103pub use timestamp::TimestampNs;