1use crate::attribute::Attributes;
7use crate::datatype::Datatype;
8use crate::error::{Hdf5Error, Result};
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum LayoutType {
14 Contiguous,
16 Chunked,
18 Compact,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
24pub enum CompressionFilter {
25 None,
27 Gzip {
29 level: u8,
31 },
32 Lzf,
34 Szip,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct DatasetProperties {
41 layout: LayoutType,
43 chunk_dims: Option<Vec<usize>>,
45 compression: CompressionFilter,
47 fill_value: Option<Vec<u8>>,
49}
50
51impl DatasetProperties {
52 pub fn new() -> Self {
54 Self {
55 layout: LayoutType::Contiguous,
56 chunk_dims: None,
57 compression: CompressionFilter::None,
58 fill_value: None,
59 }
60 }
61
62 pub fn with_layout(mut self, layout: LayoutType) -> Self {
64 self.layout = layout;
65 self
66 }
67
68 pub fn with_chunks(mut self, chunk_dims: Vec<usize>) -> Self {
70 self.layout = LayoutType::Chunked;
71 self.chunk_dims = Some(chunk_dims);
72 self
73 }
74
75 pub fn with_compression(mut self, compression: CompressionFilter) -> Self {
77 self.compression = compression;
78 self
79 }
80
81 pub fn with_gzip(mut self, level: u8) -> Self {
83 let level = level.clamp(1, 9);
84 self.compression = CompressionFilter::Gzip { level };
85 self
86 }
87
88 pub fn with_fill_value(mut self, fill_value: Vec<u8>) -> Self {
90 self.fill_value = Some(fill_value);
91 self
92 }
93
94 pub fn layout(&self) -> LayoutType {
96 self.layout
97 }
98
99 pub fn chunk_dims(&self) -> Option<&[usize]> {
101 self.chunk_dims.as_deref()
102 }
103
104 pub fn compression(&self) -> CompressionFilter {
106 self.compression
107 }
108
109 pub fn fill_value(&self) -> Option<&[u8]> {
111 self.fill_value.as_deref()
112 }
113
114 pub fn validate_chunks(&self, dims: &[usize]) -> Result<()> {
116 if let Some(chunks) = &self.chunk_dims {
117 if chunks.len() != dims.len() {
118 return Err(Hdf5Error::InvalidChunkSize(format!(
119 "Chunk dimensions ({}) must match dataset dimensions ({})",
120 chunks.len(),
121 dims.len()
122 )));
123 }
124
125 for (i, (&chunk_size, &dim_size)) in chunks.iter().zip(dims.iter()).enumerate() {
126 if chunk_size == 0 {
127 return Err(Hdf5Error::InvalidChunkSize(format!(
128 "Chunk size at dimension {} cannot be zero",
129 i
130 )));
131 }
132 if chunk_size > dim_size {
133 return Err(Hdf5Error::InvalidChunkSize(format!(
134 "Chunk size ({}) at dimension {} exceeds dataset size ({})",
135 chunk_size, i, dim_size
136 )));
137 }
138 }
139 }
140 Ok(())
141 }
142}
143
144impl Default for DatasetProperties {
145 fn default() -> Self {
146 Self::new()
147 }
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct Dataset {
153 name: String,
155 path: String,
157 datatype: Datatype,
159 dims: Vec<usize>,
161 properties: DatasetProperties,
163 attributes: Attributes,
165 #[serde(skip)]
167 data: Option<Vec<u8>>,
168}
169
170impl Dataset {
171 pub fn new(
173 name: String,
174 path: String,
175 datatype: Datatype,
176 dims: Vec<usize>,
177 properties: DatasetProperties,
178 ) -> Result<Self> {
179 if dims.is_empty() {
181 return Err(Hdf5Error::invalid_dimensions(
182 "Dataset must have at least one dimension",
183 ));
184 }
185
186 for (i, &dim) in dims.iter().enumerate() {
187 if dim == 0 {
188 return Err(Hdf5Error::invalid_dimensions(format!(
189 "Dimension {} cannot be zero",
190 i
191 )));
192 }
193 }
194
195 properties.validate_chunks(&dims)?;
197
198 Ok(Self {
199 name,
200 path,
201 datatype,
202 dims,
203 properties,
204 attributes: Attributes::new(),
205 data: None,
206 })
207 }
208
209 pub fn simple(
211 name: String,
212 path: String,
213 datatype: Datatype,
214 dims: Vec<usize>,
215 ) -> Result<Self> {
216 Self::new(name, path, datatype, dims, DatasetProperties::new())
217 }
218
219 pub fn name(&self) -> &str {
221 &self.name
222 }
223
224 pub fn path(&self) -> &str {
226 &self.path
227 }
228
229 pub fn datatype(&self) -> &Datatype {
231 &self.datatype
232 }
233
234 pub fn dims(&self) -> &[usize] {
236 &self.dims
237 }
238
239 pub fn ndims(&self) -> usize {
241 self.dims.len()
242 }
243
244 pub fn len(&self) -> usize {
246 self.dims.iter().product()
247 }
248
249 pub fn is_empty(&self) -> bool {
251 self.len() == 0
252 }
253
254 pub fn size_in_bytes(&self) -> usize {
256 self.len() * self.datatype.size()
257 }
258
259 pub fn properties(&self) -> &DatasetProperties {
261 &self.properties
262 }
263
264 pub fn attributes(&self) -> &Attributes {
266 &self.attributes
267 }
268
269 pub fn attributes_mut(&mut self) -> &mut Attributes {
271 &mut self.attributes
272 }
273
274 pub fn set_data(&mut self, data: Vec<u8>) -> Result<()> {
276 let expected_size = self.size_in_bytes();
277 if data.len() != expected_size {
278 return Err(Hdf5Error::InvalidSize(format!(
279 "Data size ({}) does not match expected size ({})",
280 data.len(),
281 expected_size
282 )));
283 }
284 self.data = Some(data);
285 Ok(())
286 }
287
288 pub fn data(&self) -> Option<&[u8]> {
290 self.data.as_deref()
291 }
292
293 pub fn take_data(&mut self) -> Option<Vec<u8>> {
295 self.data.take()
296 }
297
298 pub fn validate_slice(&self, start: &[usize], count: &[usize]) -> Result<()> {
300 if start.len() != self.ndims() {
301 return Err(Hdf5Error::invalid_dimensions(format!(
302 "Start dimensions ({}) must match dataset dimensions ({})",
303 start.len(),
304 self.ndims()
305 )));
306 }
307
308 if count.len() != self.ndims() {
309 return Err(Hdf5Error::invalid_dimensions(format!(
310 "Count dimensions ({}) must match dataset dimensions ({})",
311 count.len(),
312 self.ndims()
313 )));
314 }
315
316 for (i, (&s, &c)) in start.iter().zip(count.iter()).enumerate() {
317 if s + c > self.dims[i] {
318 return Err(Hdf5Error::OutOfBounds {
319 index: s + c,
320 size: self.dims[i],
321 });
322 }
323 }
324
325 Ok(())
326 }
327
328 pub fn slice_size(&self, count: &[usize]) -> usize {
330 count.iter().product()
331 }
332
333 pub fn slice_size_bytes(&self, count: &[usize]) -> usize {
335 self.slice_size(count) * self.datatype.size()
336 }
337}
338
339impl Dataset {
341 pub fn from_1d(name: String, path: String, datatype: Datatype, size: usize) -> Result<Self> {
343 Self::simple(name, path, datatype, vec![size])
344 }
345
346 pub fn from_2d(
348 name: String,
349 path: String,
350 datatype: Datatype,
351 rows: usize,
352 cols: usize,
353 ) -> Result<Self> {
354 Self::simple(name, path, datatype, vec![rows, cols])
355 }
356
357 pub fn from_3d(
359 name: String,
360 path: String,
361 datatype: Datatype,
362 depth: usize,
363 rows: usize,
364 cols: usize,
365 ) -> Result<Self> {
366 Self::simple(name, path, datatype, vec![depth, rows, cols])
367 }
368
369 pub fn chunked(
371 name: String,
372 path: String,
373 datatype: Datatype,
374 dims: Vec<usize>,
375 chunk_dims: Vec<usize>,
376 ) -> Result<Self> {
377 let properties = DatasetProperties::new().with_chunks(chunk_dims);
378 Self::new(name, path, datatype, dims, properties)
379 }
380
381 pub fn compressed(
383 name: String,
384 path: String,
385 datatype: Datatype,
386 dims: Vec<usize>,
387 chunk_dims: Vec<usize>,
388 compression: CompressionFilter,
389 ) -> Result<Self> {
390 let properties = DatasetProperties::new()
391 .with_chunks(chunk_dims)
392 .with_compression(compression);
393 Self::new(name, path, datatype, dims, properties)
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 #[test]
402 fn test_dataset_properties() {
403 let props = DatasetProperties::new();
404 assert_eq!(props.layout(), LayoutType::Contiguous);
405 assert!(props.chunk_dims().is_none());
406 assert_eq!(props.compression(), CompressionFilter::None);
407
408 let props = DatasetProperties::new()
409 .with_chunks(vec![10, 10])
410 .with_gzip(6);
411 assert_eq!(props.layout(), LayoutType::Chunked);
412 assert_eq!(props.chunk_dims(), Some(&[10, 10][..]));
413 assert_eq!(props.compression(), CompressionFilter::Gzip { level: 6 });
414 }
415
416 #[test]
417 fn test_dataset_creation() {
418 let dataset = Dataset::simple(
419 "data".to_string(),
420 "/data".to_string(),
421 Datatype::Float32,
422 vec![100, 200],
423 )
424 .expect("Failed to create dataset");
425
426 assert_eq!(dataset.name(), "data");
427 assert_eq!(dataset.path(), "/data");
428 assert_eq!(dataset.datatype(), &Datatype::Float32);
429 assert_eq!(dataset.dims(), &[100, 200]);
430 assert_eq!(dataset.ndims(), 2);
431 assert_eq!(dataset.len(), 20000);
432 assert_eq!(dataset.size_in_bytes(), 80000); }
434
435 #[test]
436 fn test_dataset_1d() {
437 let dataset = Dataset::from_1d(
438 "data".to_string(),
439 "/data".to_string(),
440 Datatype::Int32,
441 100,
442 )
443 .expect("Failed to create dataset");
444
445 assert_eq!(dataset.dims(), &[100]);
446 assert_eq!(dataset.len(), 100);
447 }
448
449 #[test]
450 fn test_dataset_2d() {
451 let dataset = Dataset::from_2d(
452 "data".to_string(),
453 "/data".to_string(),
454 Datatype::Float64,
455 50,
456 100,
457 )
458 .expect("Failed to create dataset");
459
460 assert_eq!(dataset.dims(), &[50, 100]);
461 assert_eq!(dataset.len(), 5000);
462 }
463
464 #[test]
465 fn test_dataset_3d() {
466 let dataset = Dataset::from_3d(
467 "data".to_string(),
468 "/data".to_string(),
469 Datatype::UInt8,
470 10,
471 20,
472 30,
473 )
474 .expect("Failed to create dataset");
475
476 assert_eq!(dataset.dims(), &[10, 20, 30]);
477 assert_eq!(dataset.len(), 6000);
478 }
479
480 #[test]
481 fn test_dataset_chunked() {
482 let dataset = Dataset::chunked(
483 "data".to_string(),
484 "/data".to_string(),
485 Datatype::Float32,
486 vec![100, 200],
487 vec![10, 20],
488 )
489 .expect("Failed to create dataset");
490
491 assert_eq!(dataset.properties().layout(), LayoutType::Chunked);
492 assert_eq!(dataset.properties().chunk_dims(), Some(&[10, 20][..]));
493 }
494
495 #[test]
496 fn test_dataset_compressed() {
497 let dataset = Dataset::compressed(
498 "data".to_string(),
499 "/data".to_string(),
500 Datatype::Float64,
501 vec![100, 200],
502 vec![10, 20],
503 CompressionFilter::Gzip { level: 6 },
504 )
505 .expect("Failed to create dataset");
506
507 assert_eq!(dataset.properties().layout(), LayoutType::Chunked);
508 assert_eq!(
509 dataset.properties().compression(),
510 CompressionFilter::Gzip { level: 6 }
511 );
512 }
513
514 #[test]
515 fn test_dataset_validate_slice() {
516 let dataset = Dataset::from_2d(
517 "data".to_string(),
518 "/data".to_string(),
519 Datatype::Int32,
520 100,
521 200,
522 )
523 .expect("Failed to create dataset");
524
525 assert!(dataset.validate_slice(&[0, 0], &[50, 100]).is_ok());
526 assert!(dataset.validate_slice(&[50, 100], &[50, 100]).is_ok());
527 assert!(dataset.validate_slice(&[0, 0], &[100, 200]).is_ok());
528 assert!(dataset.validate_slice(&[0, 0], &[101, 200]).is_err());
529 assert!(dataset.validate_slice(&[50, 100], &[51, 100]).is_err());
530 }
531
532 #[test]
533 fn test_dataset_slice_size() {
534 let dataset = Dataset::from_2d(
535 "data".to_string(),
536 "/data".to_string(),
537 Datatype::Int32,
538 100,
539 200,
540 )
541 .expect("Failed to create dataset");
542
543 assert_eq!(dataset.slice_size(&[50, 100]), 5000);
544 assert_eq!(dataset.slice_size_bytes(&[50, 100]), 20000); }
546
547 #[test]
548 fn test_dataset_set_data() {
549 let mut dataset =
550 Dataset::from_1d("data".to_string(), "/data".to_string(), Datatype::Int32, 10)
551 .expect("Failed to create dataset");
552
553 let data = vec![0u8; 40]; assert!(dataset.set_data(data).is_ok());
555
556 let wrong_size_data = vec![0u8; 50];
557 assert!(dataset.set_data(wrong_size_data).is_err());
558 }
559}