zarrs/
config.rs

1//! `zarrs` global configuration options.
2//!
3//! See [`Config`] for the list of options.
4
5use std::sync::{LazyLock, RwLock, RwLockReadGuard, RwLockWriteGuard};
6
7use zarrs_registry::{
8    ExtensionAliasesCodecV2, ExtensionAliasesCodecV3, ExtensionAliasesDataTypeV2,
9    ExtensionAliasesDataTypeV3,
10};
11
12#[cfg(doc)]
13use crate::array::{codec::CodecOptions, ArrayMetadataOptions};
14
15/// Global configuration options for the `zarrs` crate.
16///
17/// Retrieve the global [`Config`] with [`global_config`] and modify it with [`global_config_mut`].
18///
19/// ## Codec / Chunk Options
20///
21/// ### Validate Checksums
22///  > default: [`true`]
23///
24/// [`CodecOptions::validate_checksums()`] defaults to [`Config::validate_checksums()`].
25///
26/// If validate checksums is enabled, checksum codecs (e.g. `crc32c`, `fletcher32`) will validate that encoded data matches stored checksums, otherwise validation is skipped.
27/// Note that regardless of this configuration option, checksum codecs may skip validation when partial decoding.
28///
29/// ### Store Empty Chunks
30///  > default: [`false`]
31///
32/// [`CodecOptions::store_empty_chunks()`] defaults to [`Config::store_empty_chunks()`].
33///
34/// If `false`, empty chunks (where all elements match the fill value) will not be stored.
35/// This incurs a computational overhead as each element must be tested for equality to the fill value before a chunk is encoded.
36/// If `true`, the aforementioned test is skipped and empty chunks will be stored.
37/// Note that empty chunks must still be stored explicitly (e.g. with [`Array::store_chunk`](crate::array::Array::store_chunk)).
38///
39/// ### Codec Concurrent Target
40/// > default: [`std::thread::available_parallelism`]`()`
41///
42/// [`CodecOptions::concurrent_target()`] defaults to [`Config::codec_concurrent_target()`].
43///
44/// The default number of concurrent operations to target for codec encoding and decoding.
45/// Limiting concurrent operations is needed to reduce memory usage and improve performance.
46/// Concurrency is unconstrained if the concurrent target if set to zero.
47///
48/// Note that the default codec concurrent target can be overridden for any encode/decode operation.
49/// This is performed automatically for many array operations (see the [chunk concurrent minimum](#chunk-concurrent-minimum) option).
50///
51/// ### Chunk Concurrent Minimum
52/// > default: `4`
53///
54/// Array operations involving multiple chunks can tune the chunk and codec concurrency to improve performance/reduce memory usage.
55/// This option sets the preferred minimum chunk concurrency.
56/// The concurrency of internal codecs is adjusted to accomodate for the chunk concurrency in accordance with the concurrent target set in the [`CodecOptions`] parameter of an encode or decode method.
57///
58/// ### Experimental Partial Encoding
59/// > default: [`false`]
60///
61/// If `true`, [`Array::store_chunk_subset`](crate::array::Array::store_chunk_subset) and [`Array::store_array_subset`](crate::array::Array::store_array_subset) and variants can use partial encoding.
62/// This is relevant when using the sharding codec, as it enables inner chunks to be written without reading and writing entire shards.
63///
64/// This is an experimental feature for now until it has more comprehensively tested and support is added in the async API.
65///
66/// ## Metadata Options
67///
68/// ### Experimental Codec Store Metadata If Encode Only
69/// > default: [`false`]
70///
71/// Some codecs perform potentially irreversible transformations during encoding that decoders do not need to be aware of.
72/// If this option is `false`, experimental codecs with this behaviour will not write their metadata.
73/// This enables arrays to be consumed by other zarr3 implementations that do not support the experimental codec.
74/// Currently, this options only affects the `bitround` codec.
75///
76/// ### Metadata Convert Version
77/// > default: [`MetadataConvertVersion::Default`] (keep existing version)
78///
79/// Determines the Zarr version of metadata created with [`Array::metadata_opt`](crate::array::Array::metadata_opt) and [`Group::metadata_opt`](crate::group::Group::metadata_opt).
80/// These methods are used internally by the `store_metadata` and `store_metadata_opt` methods of [`crate::array::Array`] and [`crate::group::Group`].
81///
82/// ### Metadata Erase Version
83/// > default: [`MetadataEraseVersion::Default`] (erase existing version)
84///
85/// The default behaviour for the `erase_metadata` methods of [`crate::array::Array`] and [`crate::group::Group`].
86/// Determines whether to erase metadata of a specific Zarr version, the same version as the array/group was created with, or all known versions.
87///
88/// ### Include `zarrs` Metadata
89/// > default: [`true`]
90///
91/// [`ArrayMetadataOptions::include_zarrs_metadata`](crate::array::ArrayMetadataOptions::include_zarrs_metadata) defaults to [`Config::include_zarrs_metadata`].
92///
93/// If true, array metadata generated with [`Array::metadata_opt`](crate::array::Array::metadata_opt) (used internally by [`Array::store_metadata`](crate::array::Array::store_metadata)) includes the `zarrs` version and a link to its source code.
94/// For example:
95/// ```json
96/// "_zarrs": {
97///    "description": "This array was created with zarrs",
98///    "repository": "https://github.com/zarrs/zarrs",
99///    "version": "0.15.0"
100///  }
101/// ```
102///
103/// ### Codec Aliases
104/// > default: see [`ExtensionAliasesCodecV3::default`] and [`ExtensionAliasesCodecV2::default`].
105///
106/// The default codec `name`s used when serialising codecs, and recognised codec `name` aliases when deserialising codecs.
107/// Codec default `name`s and aliases can be modified at runtime.
108///
109/// Note that the [`NamedCodec`](crate::array::codec::NamedCodec) mechanism means that a serialised codec `name` can differ from the default `name`.
110/// By default, updating and storing the metadata of an array will NOT convert aliased codec names to the default codec name.
111/// This behaviour can be changed with the [convert aliased extension names](#convert-aliased-extension-names) configuration option.
112///
113/// The codec maps enable support for unstandardised codecs, such as:
114/// - codecs registered in the official [`zarr-extensions`](https://github.com/zarr-developers/zarr-extensions) repository that are compatible with `zarrs`,
115/// - `zarrs` experimental codecs with `name`s that have since changed, and
116/// - user-defined custom codecs.
117///
118/// If a codec is not present in the codec maps, the `name` will be inferred as the unique codec identifier.
119/// Codecs registered for that identifier work without any changes required for the codec maps.
120///
121/// ### Data Type Aliases
122/// > default: see [`ExtensionAliasesDataTypeV3::default`] and [`ExtensionAliasesDataTypeV2::default`].
123///
124/// These operate similarly to codec maps, but for data types.
125///
126/// ### Convert Aliased Extension Names
127/// > default: [`false`]
128///
129/// If true, then aliased extension names will be replaced by the standard name if metadata is resaved.
130/// This sets the default for [`crate::array::codec::CodecMetadataOptions`] (part of [`crate::array::ArrayMetadataOptions`])
131#[derive(Debug)]
132#[allow(clippy::struct_excessive_bools)]
133pub struct Config {
134    validate_checksums: bool,
135    store_empty_chunks: bool,
136    codec_concurrent_target: usize,
137    chunk_concurrent_minimum: usize,
138    experimental_codec_store_metadata_if_encode_only: bool,
139    metadata_convert_version: MetadataConvertVersion,
140    metadata_erase_version: MetadataEraseVersion,
141    include_zarrs_metadata: bool,
142    codec_aliases_v3: ExtensionAliasesCodecV3,
143    codec_aliases_v2: ExtensionAliasesCodecV2,
144    data_type_aliases_v3: ExtensionAliasesDataTypeV3,
145    data_type_aliases_v2: ExtensionAliasesDataTypeV2,
146    experimental_partial_encoding: bool,
147    convert_aliased_extension_names: bool,
148}
149
150#[allow(clippy::derivable_impls)]
151impl Default for Config {
152    fn default() -> Self {
153        Self {
154            validate_checksums: true,
155            store_empty_chunks: false,
156            codec_concurrent_target: rayon::current_num_threads(),
157            chunk_concurrent_minimum: 4,
158            experimental_codec_store_metadata_if_encode_only: false,
159            metadata_convert_version: MetadataConvertVersion::Default,
160            metadata_erase_version: MetadataEraseVersion::Default,
161            include_zarrs_metadata: true,
162            codec_aliases_v3: ExtensionAliasesCodecV3::default(),
163            codec_aliases_v2: ExtensionAliasesCodecV2::default(),
164            data_type_aliases_v3: ExtensionAliasesDataTypeV3::default(),
165            data_type_aliases_v2: ExtensionAliasesDataTypeV2::default(),
166            experimental_partial_encoding: false,
167            convert_aliased_extension_names: false,
168        }
169    }
170}
171
172impl Config {
173    /// Get the [validate checksums](#validate-checksums) configuration.
174    #[must_use]
175    pub fn validate_checksums(&self) -> bool {
176        self.validate_checksums
177    }
178
179    /// Set the [validate checksums](#validate-checksums) configuration.
180    pub fn set_validate_checksums(&mut self, validate_checksums: bool) -> &mut Self {
181        self.validate_checksums = validate_checksums;
182        self
183    }
184
185    /// Get the [store empty chunks](#store-empty-chunks) configuration.
186    #[must_use]
187    pub fn store_empty_chunks(&self) -> bool {
188        self.store_empty_chunks
189    }
190
191    /// Set the [store empty chunks](#store-empty-chunks) configuration.
192    pub fn set_store_empty_chunks(&mut self, store_empty_chunks: bool) -> &mut Self {
193        self.store_empty_chunks = store_empty_chunks;
194        self
195    }
196
197    /// Get the [codec concurrent target](#codec-concurrent-target) configuration.
198    #[must_use]
199    pub fn codec_concurrent_target(&self) -> usize {
200        self.codec_concurrent_target
201    }
202
203    /// Set the [codec concurrent target](#codec-concurrent-target) configuration.
204    pub fn set_codec_concurrent_target(&mut self, concurrent_target: usize) -> &mut Self {
205        self.codec_concurrent_target = concurrent_target;
206        self
207    }
208
209    /// Get the [chunk concurrent minimum](#chunk-concurrent-minimum) configuration.
210    #[must_use]
211    pub fn chunk_concurrent_minimum(&self) -> usize {
212        self.chunk_concurrent_minimum
213    }
214
215    /// Set the [chunk concurrent minimum](#chunk-concurrent-minimum) configuration.
216    pub fn set_chunk_concurrent_minimum(&mut self, concurrent_minimum: usize) -> &mut Self {
217        self.chunk_concurrent_minimum = concurrent_minimum;
218        self
219    }
220
221    /// Get the [experimental codec store metadata if encode only](#experimental-codec-store-metadata-if-encode-only) configuration.
222    #[must_use]
223    pub fn experimental_codec_store_metadata_if_encode_only(&self) -> bool {
224        self.experimental_codec_store_metadata_if_encode_only
225    }
226
227    /// Set the [experimental codec store metadata if encode only](#experimental-codec-store-metadata-if-encode-only) configuration.
228    pub fn set_experimental_codec_store_metadata_if_encode_only(
229        &mut self,
230        enabled: bool,
231    ) -> &mut Self {
232        self.experimental_codec_store_metadata_if_encode_only = enabled;
233        self
234    }
235
236    /// Get the [metadata convert version](#metadata-convert-version) configuration.
237    #[must_use]
238    pub fn metadata_convert_version(&self) -> MetadataConvertVersion {
239        self.metadata_convert_version
240    }
241
242    /// Set the [metadata convert version](#metadata-convert-version) configuration.
243    pub fn set_metadata_convert_version(&mut self, version: MetadataConvertVersion) -> &mut Self {
244        self.metadata_convert_version = version;
245        self
246    }
247
248    /// Get the [metadata erase version](#metadata-erase-version) configuration.
249    #[must_use]
250    pub fn metadata_erase_version(&self) -> MetadataEraseVersion {
251        self.metadata_erase_version
252    }
253
254    /// Set the [metadata erase version](#metadata-erase-version) configuration.
255    pub fn set_metadata_erase_version(&mut self, version: MetadataEraseVersion) -> &mut Self {
256        self.metadata_erase_version = version;
257        self
258    }
259
260    /// Get the [include zarrs metadata](#include-zarrs-metadata) configuration.
261    #[must_use]
262    pub fn include_zarrs_metadata(&self) -> bool {
263        self.include_zarrs_metadata
264    }
265
266    /// Set the [include zarrs metadata](#include-zarrs-metadata) configuration.
267    pub fn set_include_zarrs_metadata(&mut self, include_zarrs_metadata: bool) -> &mut Self {
268        self.include_zarrs_metadata = include_zarrs_metadata;
269        self
270    }
271
272    /// Get the Zarr V3 [codec aliases](#codec-aliases) configuration.
273    #[must_use]
274    pub fn codec_aliases_v3(&self) -> &ExtensionAliasesCodecV3 {
275        &self.codec_aliases_v3
276    }
277
278    /// Get a mutable reference to the Zarr V3 [codec aliases](#codec-aliases) configuration.
279    pub fn codec_aliases_v3_mut(&mut self) -> &mut ExtensionAliasesCodecV3 {
280        &mut self.codec_aliases_v3
281    }
282
283    /// Get the Zarr V3 [data type aliases](#data-type-aliases) configuration.
284    #[must_use]
285    pub fn data_type_aliases_v3(&self) -> &ExtensionAliasesDataTypeV3 {
286        &self.data_type_aliases_v3
287    }
288
289    /// Get a mutable reference to the Zarr V3 [data type aliases](#data-type-aliases) configuration.
290    pub fn data_type_aliases_v3_mut(&mut self) -> &mut ExtensionAliasesDataTypeV3 {
291        &mut self.data_type_aliases_v3
292    }
293
294    /// Get the Zarr V2 [codec aliases](#codec-aliases) configuration.
295    #[must_use]
296    pub fn codec_aliases_v2(&self) -> &ExtensionAliasesCodecV2 {
297        &self.codec_aliases_v2
298    }
299
300    /// Get a mutable reference to the Zarr V2 [codec aliases](#codec-aliases) configuration.
301    pub fn codec_aliases_v2_mut(&mut self) -> &mut ExtensionAliasesCodecV2 {
302        &mut self.codec_aliases_v2
303    }
304
305    /// Get the Zarr V2 [data type aliases](#data-type-aliases) configuration.
306    #[must_use]
307    pub fn data_type_aliases_v2(&self) -> &ExtensionAliasesDataTypeV2 {
308        &self.data_type_aliases_v2
309    }
310
311    /// Get a mutable reference to the Zarr V2 [data type aliases](#data-type-aliases) configuration.
312    pub fn data_type_aliases_v2_mut(&mut self) -> &mut ExtensionAliasesDataTypeV2 {
313        &mut self.data_type_aliases_v2
314    }
315
316    /// Get the [experimental partial encoding](#experimental-partial-encoding) configuration.
317    #[must_use]
318    pub fn experimental_partial_encoding(&self) -> bool {
319        self.experimental_partial_encoding
320    }
321
322    /// Set the [experimental partial encoding](#experimental-partial-encoding) configuration.
323    pub fn set_experimental_partial_encoding(
324        &mut self,
325        experimental_partial_encoding: bool,
326    ) -> &mut Self {
327        self.experimental_partial_encoding = experimental_partial_encoding;
328        self
329    }
330
331    /// Set the [convert aliased extension names](#convert-aliased-extension-names) configuration.
332    #[must_use]
333    pub fn convert_aliased_extension_names(&self) -> bool {
334        self.convert_aliased_extension_names
335    }
336
337    /// Set the [convert aliased extension names](#convert-aliased-extension-names) configuration.
338    pub fn set_convert_aliased_extension_names(
339        &mut self,
340        convert_aliased_extension_names: bool,
341    ) -> &mut Self {
342        self.convert_aliased_extension_names = convert_aliased_extension_names;
343        self
344    }
345}
346
347static CONFIG: LazyLock<RwLock<Config>> = LazyLock::new(|| RwLock::new(Config::default()));
348
349/// Returns a reference to the global `zarrs` configuration.
350///
351/// # Panics
352/// This function panics if the underlying lock has been poisoned and might panic if the global config is already held by the current thread.
353pub fn global_config() -> RwLockReadGuard<'static, Config> {
354    CONFIG.read().unwrap()
355}
356
357/// Returns a mutable reference to the global `zarrs` configuration.
358///
359/// # Panics
360/// This function panics if the underlying lock has been poisoned and might panic if the global config is already held by the current thread.
361pub fn global_config_mut() -> RwLockWriteGuard<'static, Config> {
362    CONFIG.write().unwrap()
363}
364
365/// The metadata version to retrieve.
366///
367/// Used with [`crate::array::Array::open_opt`], [`crate::group::Group::open_opt`].
368pub enum MetadataRetrieveVersion {
369    /// Either Zarr V3 or V2. V3 is prioritised over V2 if found.
370    Default,
371    /// Zarr V3.
372    V3,
373    /// Zarr V2.
374    V2,
375}
376
377/// Version options for [`Array::store_metadata`](crate::array::Array::store_metadata) and [`Group::store_metadata`](crate::group::Group::store_metadata), and their async variants.
378#[derive(Debug, Clone, Copy)]
379pub enum MetadataConvertVersion {
380    /// Write the same version as the input metadata.
381    Default,
382    /// Write Zarr V3 metadata. Zarr V2 metadata will not be automatically removed if it exists.
383    V3,
384}
385
386/// Version options for [`Array::erase_metadata`](crate::array::Array::erase_metadata) and [`Group::erase_metadata`](crate::group::Group::erase_metadata), and their async variants.
387#[derive(Debug, Clone, Copy)]
388pub enum MetadataEraseVersion {
389    /// Erase the same version as the input metadata.
390    Default,
391    /// Erase all metadata.
392    All,
393    /// Erase Zarr V3 metadata.
394    V3,
395    /// Erase Zarr V2 metadata.
396    V2,
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn config_validate_checksums() {
405        assert!(global_config().validate_checksums());
406        global_config_mut().set_validate_checksums(false);
407        assert!(!global_config().validate_checksums());
408        global_config_mut().set_validate_checksums(true);
409    }
410}