atlas/lib.rs
1#![warn(missing_docs)]
2
3//! ATLAS (Aggregated Tensor Large Array Store) is a directory-based store for thousands of named datasets.
4//!
5//! Each dataset is a virtual collection of named N-dimensional arrays with per-dataset and
6//! per-array attributes, backed by the `array-format` crate. Datasets sharing an array name
7//! are co-located in the same physical file, keyed by dataset name.
8//!
9//! # Layout
10//!
11//! ```text
12//! my_store/
13//! ├── atlas.json <- dataset registry + per-dataset attributes
14//! ├── temperature/
15//! │ └── data.af <- ArrayFile: one named array per dataset
16//! └── latitude/
17//! └── data.af
18//! ```
19//!
20//! # Quick start
21//!
22//! ```
23//! use atlas::{Atlas, Attr, StoreConfig};
24//! use ndarray::Array2;
25//!
26//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
27//! let tmp = tempfile::tempdir().unwrap();
28//!
29//! // Create — codec persists in atlas.json so `open_path` doesn't need it.
30//! let mut s = Atlas::create_path(tmp.path(), StoreConfig::default()).await.unwrap();
31//! {
32//! let mut ds = s.create_dataset("jan_2024").await.unwrap();
33//! ds.define_array::<f32>(
34//! "temperature",
35//! vec!["lat".into(), "lon".into()],
36//! vec![4, 8],
37//! None, // chunk_shape — defaults to full shape (one chunk)
38//! None, // fill_value
39//! ).await.unwrap();
40//! let data = Array2::<f32>::from_elem([4, 8], 20.0).into_dyn();
41//! ds.write_array("temperature", vec![0, 0], data.view()).await.unwrap();
42//! ds.set_attribute("month", Attr::Int64(1));
43//! }
44//! s.flush().await.unwrap(); // single durability boundary
45//!
46//! // Reopen — no config needed.
47//! let s2 = Atlas::open_path(tmp.path()).await.unwrap();
48//! let ds2 = s2.open_dataset("jan_2024").await.unwrap();
49//! let temp = ds2.read_array::<f32>("temperature", vec![], vec![]).await.unwrap().unwrap();
50//! assert_eq!(temp.shape(), &[4, 8]);
51//! assert_eq!(temp[[0, 0]], 20.0);
52//! # });
53//! ```
54//!
55//! # Thread safety
56//!
57//! `Atlas` and `DatasetView` are `Send + Sync`. Each physical array file
58//! is guarded by a `tokio::sync::RwLock`: concurrent reads (`read_array`,
59//! `array_stats`) proceed in parallel without contention, while writes
60//! (`write_array`, `define_array`, `flush`, `compact`, …) take an exclusive
61//! lock. The cache map uses a `parking_lot::RwLock` that is never held across
62//! an `await` point.
63//!
64//! # Durability
65//!
66//! `atlas.json` is loaded **once** when the store is opened or created; from
67//! then on every mutation (`create_dataset`, `define_array`, `set_attribute`,
68//! …) only touches the in-memory `StoreMeta`. The store does **not** persist
69//! until [`Atlas::flush`] is called. Dropping an `Atlas`
70//! without flushing abandons every pending in-memory write.
71
72mod array;
73mod config;
74mod dataset;
75mod error;
76mod meta;
77mod schema;
78mod store;
79
80pub use config::{Codec, MetaFormat, StoreConfig};
81pub use dataset::DatasetView;
82pub use error::{Error, Result};
83pub use meta::DatasetMeta;
84pub use store::Atlas;
85
86pub use array_format::{
87 ArrayElement, ArrayStats, DType, DeltaCache, FillValue, MergedArrayMeta, StatValue, TimestampNs,
88};
89pub use schema::{ArraySchema, Attr};
90
91pub(crate) fn validate_name(name: &str) -> Result<()> {
92 if name.is_empty() || name.starts_with('_') || name.contains('/') || name == ".." || name == "."
93 {
94 return Err(Error::InvalidName(name.to_string()));
95 }
96 Ok(())
97}
98
99#[cfg(test)]
100mod tests {
101 use super::*;
102
103 #[tokio::test]
104 async fn create_and_read_dataset() {
105 let tmp = tempfile::tempdir().unwrap();
106
107 {
108 let mut atlas = Atlas::create_path(tmp.path(), StoreConfig::default())
109 .await
110 .unwrap();
111 {
112 let mut view = atlas.create_dataset("ds").await.unwrap();
113 view.define_array::<f32>("temp", vec!["x".into()], vec![4], None, None)
114 .await
115 .unwrap();
116 }
117 atlas.flush().await.unwrap();
118 }
119
120 let atlas = Atlas::open_path(tmp.path()).await.unwrap();
121 let view = atlas.open_dataset("ds").await.unwrap();
122 assert_eq!(view.list_arrays(), vec!["temp".to_string()]);
123 }
124
125 #[test]
126 fn valid_names_pass() {
127 for name in ["temperature", "my-array", "x1", "lat.lon", "a"] {
128 assert!(validate_name(name).is_ok(), "expected '{name}' to be valid");
129 }
130 }
131
132 #[test]
133 fn empty_name_rejected() {
134 assert!(matches!(validate_name(""), Err(Error::InvalidName(_))));
135 }
136
137 #[test]
138 fn leading_underscore_rejected() {
139 assert!(matches!(
140 validate_name("_hidden"),
141 Err(Error::InvalidName(_))
142 ));
143 assert!(matches!(validate_name("_"), Err(Error::InvalidName(_))));
144 }
145
146 #[test]
147 fn slash_in_name_rejected() {
148 assert!(matches!(validate_name("a/b"), Err(Error::InvalidName(_))));
149 assert!(matches!(validate_name("/abs"), Err(Error::InvalidName(_))));
150 }
151
152 #[test]
153 fn dotdot_rejected() {
154 assert!(matches!(validate_name(".."), Err(Error::InvalidName(_))));
155 }
156
157 #[test]
158 fn single_dot_rejected() {
159 assert!(matches!(validate_name("."), Err(Error::InvalidName(_))));
160 }
161}
162
163#[cfg(test)]
164mod send_check {
165 use super::*;
166 fn _assert_send<T: Send>() {}
167 fn _assert_sync<T: Sync>() {}
168 #[test]
169 fn store_send() {
170 _assert_send::<Atlas>();
171 }
172 #[test]
173 fn view_send() {
174 _assert_send::<DatasetView>();
175 }
176 #[test]
177 fn store_sync() {
178 _assert_sync::<Atlas>();
179 }
180 #[test]
181 fn view_sync() {
182 _assert_sync::<DatasetView>();
183 }
184}