1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
use crate::Error;
/// Compression algorithms supported by blosc2.
///
/// The library itself always uses some "backend" compression algorithm, such as `blosclz`, `lz4`,
/// `zlib`, or `zstd`.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum CompressAlgo {
/// Blosc's own compression algorithm, `blosclz`.
Blosclz = blosc2_sys::BLOSC_BLOSCLZ as _,
/// LZ4 compression algorithm.
Lz4 = blosc2_sys::BLOSC_LZ4 as _,
/// LZ4HC compression algorithm.
Lz4hc = blosc2_sys::BLOSC_LZ4HC as _,
/// Zlib compression algorithm.
#[cfg(feature = "zlib")]
Zlib = blosc2_sys::BLOSC_ZLIB as _,
/// Zstandard compression algorithm.
#[cfg(feature = "zstd")]
Zstd = blosc2_sys::BLOSC_ZSTD as _,
}
/// Filters that can be applied to the data before compression.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum Filter {
/// Byte shuffle filter.
///
/// Given an array of bytes, representing N elements of a type with S bytes, the filter rearrange the bytes from
/// `[1_1, 1_2, ..., 1_S, 2_1, 2_2, ..., 2_S, ..., N_1, N_2, ..., N_S]` to
/// `[1_1, 2_1, ..., N_1, 1_2, 2_2, ..., N_2, ..., 1_S, 2_S, ..., N_S]`,
/// where `i_j` is the j-th byte of the i-th element.
ByteShuffle,
/// Bit shuffle filter.
///
/// Similar to `ByteShuffle`, but operates on bits instead of bytes.
BitShuffle,
/// Delta filter.
///
/// This filter encodes the data as differences between consecutive elements.
///
/// The C library seems to have some issues with this filter, and some bugs are known to exist.
/// For now, it is recommended to avoid using this filter until the issues are resolved.
/// See for example <https://github.com/Blosc/c-blosc2/issues/701>
Delta,
/// Truncation precision filter for floating point data.
///
/// This filter reduces the precision of floating point numbers by truncating (zeros) the least
/// significant bits.
///
/// This filter is only supported for floating point types (e.g., `f32`, `f64`). This can not
/// be enforced by the library, there it is only a check that the typesize is 4 or 8 bytes.
///
/// The C library seems to have some issues when this filter is used together with `ByteShuffle`.
TruncPrecision {
/// The number of bits to keep.
///
/// Positive value will set absolute precision bits, whereas negative
/// value will reduce the precision bits (similar to Python slicing convention).
prec_bits: i8,
},
}
/// A split mode option for encoders.
#[allow(missing_docs)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum SplitMode {
Always = blosc2_sys::BLOSC_ALWAYS_SPLIT as _,
Never = blosc2_sys::BLOSC_NEVER_SPLIT as _,
Auto = blosc2_sys::BLOSC_AUTO_SPLIT as _,
ForwardCompat = blosc2_sys::BLOSC_FORWARD_COMPAT_SPLIT as _,
}
/// Compression parameters, used by [`Encoder`](crate::chunk::Encoder), [`SChunk`](crate::chunk::SChunk) and [`Ndarray`](crate::nd::Ndarray).
#[derive(Clone)]
pub struct CParams(pub(crate) blosc2_sys::blosc2_cparams);
impl Default for CParams {
fn default() -> Self {
Self(unsafe { blosc2_sys::blosc2_get_blosc2_cparams_defaults() })
}
}
impl CParams {
/// Set the compressor to use.
///
/// By default, the compressor is set to `Blosclz`.
pub fn compressor(&mut self, compressor: CompressAlgo) -> &mut Self {
self.0.compcode = compressor as _;
self
}
/// Get the compressor currently set in the parameters.
pub fn get_compressor(&self) -> CompressAlgo {
match self.0.compcode as _ {
blosc2_sys::BLOSC_BLOSCLZ => CompressAlgo::Blosclz,
blosc2_sys::BLOSC_LZ4 => CompressAlgo::Lz4,
blosc2_sys::BLOSC_LZ4HC => CompressAlgo::Lz4hc,
#[cfg(feature = "zlib")]
blosc2_sys::BLOSC_ZLIB => CompressAlgo::Zlib,
#[cfg(feature = "zstd")]
blosc2_sys::BLOSC_ZSTD => CompressAlgo::Zstd,
unknown_code => panic!("Unknown compressor code: {unknown_code}"),
}
}
/// Set the compression level, in range [0, 9].
///
/// By default, the compression level is set to 5.
pub fn clevel(&mut self, clevel: u32) -> &mut Self {
self.0.clevel = clevel as _;
self
}
/// Get the compression level currently set in the parameters.
pub fn get_clevel(&self) -> u32 {
self.0.clevel as u32
}
/// Set the typesize of the data to compress (in bytes).
///
/// The typesize must be in range [1, 255].
///
/// By default, the typesize is set to 8 bytes.
pub fn typesize(&mut self, typesize: usize) -> Result<&mut Self, Error> {
if !(1..=blosc2_sys::BLOSC_MAX_TYPESIZE as usize).contains(&typesize) {
crate::trace!(
"Itemsize {} is greater than BLOSC_MAX_TYPESIZE {}",
typesize,
blosc2_sys::BLOSC_MAX_TYPESIZE
);
return Err(Error::InvalidParam);
}
self.0.typesize = typesize as _;
Ok(self)
}
/// Get the typesize currently set in the parameters.
pub fn get_typesize(&self) -> usize {
debug_assert!(self.0.typesize > 0);
self.0.typesize as usize
}
/// Set the number of threads to use for compression.
///
/// By default, the number of threads is set to 1.
pub fn nthreads(&mut self, nthreads: usize) -> &mut Self {
self.0.nthreads = nthreads.max(1) as i16;
self
}
/// Get the number of threads currently set in the parameters.
pub fn get_nthreads(&self) -> usize {
self.0.nthreads as usize
}
/// Set the block size for compression.
///
/// `None` means automatic block size.
///
/// By default, an automatic block size is used.
pub fn blocksize(&mut self, blocksize: Option<usize>) -> &mut Self {
self.0.blocksize = match blocksize {
None => 0, // auto
Some(0) => 1,
Some(blocksize) => blocksize as _,
};
self
}
/// Get the block size currently set in the parameters.
///
/// `None` means automatic block size.
pub fn get_blocksize(&self) -> Option<usize> {
(self.0.blocksize > 0).then_some(self.0.blocksize as usize)
}
/// Set the split mode for the encoder.
///
/// By default, the split mode is set to `ForwardCompat`.
pub fn splitmode(&mut self, splitmode: SplitMode) -> &mut Self {
self.0.splitmode = splitmode as _;
self
}
/// Get the split mode currently set in the parameters.
pub fn get_splitmode(&self) -> SplitMode {
match self.0.splitmode as _ {
blosc2_sys::BLOSC_ALWAYS_SPLIT => SplitMode::Always,
blosc2_sys::BLOSC_NEVER_SPLIT => SplitMode::Never,
blosc2_sys::BLOSC_AUTO_SPLIT => SplitMode::Auto,
blosc2_sys::BLOSC_FORWARD_COMPAT_SPLIT => SplitMode::ForwardCompat,
unknown_mode => panic!("Unknown split mode: {unknown_mode}"),
}
}
/// Set the filters to apply before compression.
///
/// The maximum number of filters is 6.
///
/// By default, a single `ByteShuffle` filter is applied.
pub fn filters(&mut self, filters: &[Filter]) -> Result<&mut Self, Error> {
if filters.len() > 6 {
crate::trace!("Too many filters, maximum is 6");
return Err(Error::InvalidParam);
}
if filters
.iter()
.any(|f| matches!(f, Filter::TruncPrecision { .. }))
&& filters.iter().any(|f| matches!(f, Filter::ByteShuffle))
{
println!("Warning, using both the trunc precision filter and byte shuffle seems buggy!")
}
self.0.filters = [blosc2_sys::BLOSC_NOFILTER as _; 6];
self.0.filters_meta = [0; 6];
for (i, filter) in filters.iter().enumerate() {
let (filter, meta) = match filter {
Filter::ByteShuffle => (blosc2_sys::BLOSC_SHUFFLE, 0),
Filter::BitShuffle => (blosc2_sys::BLOSC_BITSHUFFLE, 0),
Filter::Delta => {
// TODO: https://github.com/Blosc/c-blosc2/issues/701
println!("Warning, the delta filter seems buggy in c-blosc2!");
(blosc2_sys::BLOSC_DELTA, 0)
}
Filter::TruncPrecision { prec_bits } => {
(blosc2_sys::BLOSC_TRUNC_PREC, *prec_bits as u8)
}
};
self.0.filters[i] = filter as _;
self.0.filters_meta[i] = meta;
}
Ok(self)
}
/// Get the filters currently set in the parameters.
pub fn get_filters(&self) -> impl Iterator<Item = Filter> {
let filters = self.0.filters;
let filters_meta = self.0.filters_meta;
filters
.into_iter()
.zip(filters_meta)
.filter_map(|(f, meta)| {
Some(match f as _ {
blosc2_sys::BLOSC_NOFILTER => return None,
blosc2_sys::BLOSC_SHUFFLE => Filter::ByteShuffle,
blosc2_sys::BLOSC_BITSHUFFLE => Filter::BitShuffle,
blosc2_sys::BLOSC_DELTA => Filter::Delta,
blosc2_sys::BLOSC_TRUNC_PREC => Filter::TruncPrecision {
prec_bits: meta as _,
},
unknown_filter => panic!("Unknown filter code: {unknown_filter}"),
})
})
}
}
impl std::fmt::Debug for CParams {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("CParams")
.field("compressor", &self.get_compressor())
.field("clevel", &self.get_clevel())
.field("typesize", &self.get_typesize())
.field("nthreads", &self.get_nthreads())
.field("blocksize", &self.get_blocksize())
.field("splitmode", &self.get_splitmode())
.field("filters", &self.get_filters().collect::<Vec<_>>())
.finish()
}
}
/// Decompression parameters, used by [`Decoder`](crate::chunk::Decoder), [`SChunk`](crate::chunk::SChunk) and [`Ndarray`](crate::nd::Ndarray).
#[derive(Clone)]
pub struct DParams(pub(crate) blosc2_sys::blosc2_dparams);
impl Default for DParams {
fn default() -> Self {
Self(unsafe { blosc2_sys::blosc2_get_blosc2_dparams_defaults() })
}
}
impl DParams {
/// Set the number of threads to use for decompression.
///
/// By default, the number of threads is set to 1.
pub fn nthreads(&mut self, nthreads: usize) -> &mut Self {
self.0.nthreads = nthreads.max(1) as i16;
self
}
/// Get the number of threads currently set in the parameters.
pub fn get_nthreads(&self) -> usize {
self.0.nthreads as usize
}
}
impl std::fmt::Debug for DParams {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DParams")
.field("nthreads", &self.get_nthreads())
.finish()
}
}