Skip to main content

ad_plugins/
file_hdf5.rs

1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3
4use ad_core::error::{ADError, ADResult};
5use ad_core::ndarray::{NDArray, NDDataBuffer, NDDataType, NDDimension};
6use ad_core::ndarray_pool::NDArrayPool;
7use ad_core::plugin::file_base::{NDFileMode, NDFileWriter, NDPluginFileBase};
8use ad_core::plugin::runtime::{NDPluginProcess, ProcessResult};
9
10// ============================================================
11// Real HDF5 writer (feature-gated)
12// ============================================================
13
14#[cfg(feature = "hdf5")]
15mod hdf5_real {
16    use super::*;
17    use hdf5_metno::File as H5File;
18
19    /// HDF5 file writer using the hdf5 crate.
20    pub struct Hdf5RealWriter {
21        current_path: Option<PathBuf>,
22        file: Option<H5File>,
23        frame_count: usize,
24        dataset_name: String,
25    }
26
27    impl Hdf5RealWriter {
28        pub fn new() -> Self {
29            Self {
30                current_path: None,
31                file: None,
32                frame_count: 0,
33                dataset_name: "data".to_string(),
34            }
35        }
36
37        pub fn set_dataset_name(&mut self, name: &str) {
38            self.dataset_name = name.to_string();
39        }
40    }
41
42    impl NDFileWriter for Hdf5RealWriter {
43        fn open_file(&mut self, path: &Path, _mode: NDFileMode, _array: &NDArray) -> ADResult<()> {
44            self.current_path = Some(path.to_path_buf());
45            self.frame_count = 0;
46
47            let h5file = H5File::create(path)
48                .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 create error: {}", e)))?;
49            self.file = Some(h5file);
50            Ok(())
51        }
52
53        fn write_file(&mut self, array: &NDArray) -> ADResult<()> {
54            let h5file = self.file.as_ref()
55                .ok_or_else(|| ADError::UnsupportedConversion("no HDF5 file open".into()))?;
56
57            let dataset_name = if self.frame_count == 0 {
58                self.dataset_name.clone()
59            } else {
60                format!("{}_{}", self.dataset_name, self.frame_count)
61            };
62
63            // Write based on data type
64            let shape = array.dims.iter().rev().map(|d| d.size).collect::<Vec<_>>();
65
66            match &array.data {
67                NDDataBuffer::U8(v) => {
68                    let ds = h5file.new_dataset::<u8>()
69                        .shape(&shape[..])
70                        .create(dataset_name.as_str())
71                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
72                    ds.write_raw(v)
73                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 write error: {}", e)))?;
74                }
75                NDDataBuffer::U16(v) => {
76                    let ds = h5file.new_dataset::<u16>()
77                        .shape(&shape[..])
78                        .create(dataset_name.as_str())
79                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
80                    ds.write_raw(v)
81                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 write error: {}", e)))?;
82                }
83                NDDataBuffer::I32(v) => {
84                    let ds = h5file.new_dataset::<i32>()
85                        .shape(&shape[..])
86                        .create(dataset_name.as_str())
87                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
88                    ds.write_raw(v)
89                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 write error: {}", e)))?;
90                }
91                NDDataBuffer::F32(v) => {
92                    let ds = h5file.new_dataset::<f32>()
93                        .shape(&shape[..])
94                        .create(dataset_name.as_str())
95                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
96                    ds.write_raw(v)
97                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 write error: {}", e)))?;
98                }
99                NDDataBuffer::F64(v) => {
100                    let ds = h5file.new_dataset::<f64>()
101                        .shape(&shape[..])
102                        .create(dataset_name.as_str())
103                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
104                    ds.write_raw(v)
105                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 write error: {}", e)))?;
106                }
107                _ => {
108                    // Fallback: write as raw bytes
109                    let raw = array.data.as_u8_slice();
110                    let ds = h5file.new_dataset::<u8>()
111                        .shape([raw.len()])
112                        .create(dataset_name.as_str())
113                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
114                    ds.write_raw(raw)
115                        .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 write error: {}", e)))?;
116                }
117            }
118
119            // Write attributes
120            for attr in array.attributes.iter() {
121                let val_str = attr.value.as_string();
122                // HDF5 string attributes on the dataset
123                if let Ok(ds) = h5file.dataset(&dataset_name) {
124                    let _ = ds.new_attr::<hdf5_metno::types::VarLenUnicode>()
125                        .shape(())
126                        .create(attr.name.as_str())
127                        .and_then(|a| {
128                            let s: hdf5_metno::types::VarLenUnicode = val_str.parse().unwrap_or_default();
129                            a.write_scalar(&s)
130                        });
131                }
132            }
133
134            self.frame_count += 1;
135            Ok(())
136        }
137
138        fn read_file(&mut self) -> ADResult<NDArray> {
139            let path = self.current_path.as_ref()
140                .ok_or_else(|| ADError::UnsupportedConversion("no file open".into()))?;
141
142            let h5file = H5File::open(path)
143                .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 open error: {}", e)))?;
144
145            let ds = h5file.dataset(&self.dataset_name)
146                .map_err(|e| ADError::UnsupportedConversion(format!("HDF5 dataset error: {}", e)))?;
147
148            let shape = ds.shape();
149            let dims: Vec<NDDimension> = shape.iter().rev().map(|&s| NDDimension::new(s)).collect();
150
151            // Try reading as different types
152            if let Ok(data) = ds.read_raw::<u8>() {
153                let mut arr = NDArray::new(dims, NDDataType::UInt8);
154                arr.data = NDDataBuffer::U8(data);
155                return Ok(arr);
156            }
157            if let Ok(data) = ds.read_raw::<u16>() {
158                let mut arr = NDArray::new(dims, NDDataType::UInt16);
159                arr.data = NDDataBuffer::U16(data);
160                return Ok(arr);
161            }
162            if let Ok(data) = ds.read_raw::<f64>() {
163                let mut arr = NDArray::new(dims, NDDataType::Float64);
164                arr.data = NDDataBuffer::F64(data);
165                return Ok(arr);
166            }
167
168            Err(ADError::UnsupportedConversion("unsupported HDF5 data type".into()))
169        }
170
171        fn close_file(&mut self) -> ADResult<()> {
172            self.file = None;
173            self.current_path = None;
174            Ok(())
175        }
176
177        fn supports_multiple_arrays(&self) -> bool {
178            true
179        }
180    }
181}
182
183// ============================================================
184// Binary format writer (fallback when hdf5 feature is not enabled)
185// ============================================================
186
187/// HDF5-compatible binary file writer.
188/// When the `hdf5` feature is enabled, use `Hdf5RealWriter` for proper HDF5 I/O.
189/// This fallback writes binary data in a simple custom format with HDF5 magic header.
190pub struct Hdf5Writer {
191    current_path: Option<PathBuf>,
192    frame_count: usize,
193    file: Option<std::fs::File>,
194}
195
196impl Hdf5Writer {
197    pub fn new() -> Self {
198        Self {
199            current_path: None,
200            frame_count: 0,
201            file: None,
202        }
203    }
204
205    pub fn frame_count(&self) -> usize {
206        self.frame_count
207    }
208}
209
210impl Default for Hdf5Writer {
211    fn default() -> Self {
212        Self::new()
213    }
214}
215
216impl NDFileWriter for Hdf5Writer {
217    fn open_file(&mut self, path: &Path, _mode: NDFileMode, _array: &NDArray) -> ADResult<()> {
218        use std::io::Write;
219
220        self.current_path = Some(path.to_path_buf());
221        self.frame_count = 0;
222
223        let mut file = std::fs::File::create(path)?;
224        // Write a simple header (placeholder for HDF5 superblock)
225        file.write_all(b"\x89HDF\r\n\x1a\n")?; // HDF5 magic
226        self.file = Some(file);
227        Ok(())
228    }
229
230    fn write_file(&mut self, array: &NDArray) -> ADResult<()> {
231        use std::io::Write;
232
233        let file = self.file.as_mut()
234            .ok_or_else(|| ADError::UnsupportedConversion("no file open".into()))?;
235
236        let info = array.info();
237
238        // Write frame header: ndims, dims, dtype, data_size
239        let ndims = array.dims.len() as u32;
240        file.write_all(&ndims.to_le_bytes())?;
241        for dim in &array.dims {
242            file.write_all(&(dim.size as u32).to_le_bytes())?;
243        }
244        file.write_all(&(array.data.data_type() as u8).to_le_bytes())?;
245        let data_size = info.total_bytes as u32;
246        file.write_all(&data_size.to_le_bytes())?;
247
248        // Write raw data
249        file.write_all(array.data.as_u8_slice())?;
250
251        // Write attributes
252        let num_attrs = array.attributes.len() as u32;
253        file.write_all(&num_attrs.to_le_bytes())?;
254        for attr in array.attributes.iter() {
255            let name_bytes = attr.name.as_bytes();
256            file.write_all(&(name_bytes.len() as u16).to_le_bytes())?;
257            file.write_all(name_bytes)?;
258            let val_str = attr.value.as_string();
259            let val_bytes = val_str.as_bytes();
260            file.write_all(&(val_bytes.len() as u16).to_le_bytes())?;
261            file.write_all(val_bytes)?;
262        }
263
264        self.frame_count += 1;
265        Ok(())
266    }
267
268    fn read_file(&mut self) -> ADResult<NDArray> {
269        let path = self.current_path.as_ref()
270            .ok_or_else(|| ADError::UnsupportedConversion("no file open".into()))?;
271
272        let data = std::fs::read(path)?;
273        if data.len() < 8 || &data[0..8] != b"\x89HDF\r\n\x1a\n" {
274            return Err(ADError::UnsupportedConversion("not an HDF5 file".into()));
275        }
276
277        // Read first frame
278        let mut pos = 8;
279        if pos + 4 > data.len() {
280            return Err(ADError::UnsupportedConversion("truncated file".into()));
281        }
282        let ndims = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
283        pos += 4;
284
285        let mut dims = Vec::with_capacity(ndims);
286        for _ in 0..ndims {
287            if pos + 4 > data.len() {
288                return Err(ADError::UnsupportedConversion("truncated file".into()));
289            }
290            let size = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
291            dims.push(NDDimension::new(size));
292            pos += 4;
293        }
294
295        if pos + 5 > data.len() {
296            return Err(ADError::UnsupportedConversion("truncated file".into()));
297        }
298        let dtype_ord = data[pos];
299        pos += 1;
300        let data_size = u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize;
301        pos += 4;
302
303        let dtype = NDDataType::from_ordinal(dtype_ord)
304            .ok_or_else(|| ADError::UnsupportedConversion("invalid data type".into()))?;
305
306        if pos + data_size > data.len() {
307            return Err(ADError::UnsupportedConversion("truncated file".into()));
308        }
309        let raw = &data[pos..pos + data_size];
310
311        let buf = reconstruct_buffer(dtype, raw);
312
313        let mut arr = NDArray::new(dims, dtype);
314        arr.data = buf;
315        Ok(arr)
316    }
317
318    fn close_file(&mut self) -> ADResult<()> {
319        self.file = None;
320        self.current_path = None;
321        Ok(())
322    }
323
324    fn supports_multiple_arrays(&self) -> bool {
325        true
326    }
327}
328
329/// Reconstruct a typed NDDataBuffer from raw bytes.
330fn reconstruct_buffer(dtype: NDDataType, raw: &[u8]) -> NDDataBuffer {
331    match dtype {
332        NDDataType::Int8 => {
333            NDDataBuffer::I8(raw.iter().map(|&b| b as i8).collect())
334        }
335        NDDataType::UInt8 => {
336            NDDataBuffer::U8(raw.to_vec())
337        }
338        NDDataType::Int16 => {
339            let v: Vec<i16> = raw.chunks_exact(2)
340                .map(|c| i16::from_ne_bytes([c[0], c[1]]))
341                .collect();
342            NDDataBuffer::I16(v)
343        }
344        NDDataType::UInt16 => {
345            let v: Vec<u16> = raw.chunks_exact(2)
346                .map(|c| u16::from_ne_bytes([c[0], c[1]]))
347                .collect();
348            NDDataBuffer::U16(v)
349        }
350        NDDataType::Int32 => {
351            let v: Vec<i32> = raw.chunks_exact(4)
352                .map(|c| i32::from_ne_bytes([c[0], c[1], c[2], c[3]]))
353                .collect();
354            NDDataBuffer::I32(v)
355        }
356        NDDataType::UInt32 => {
357            let v: Vec<u32> = raw.chunks_exact(4)
358                .map(|c| u32::from_ne_bytes([c[0], c[1], c[2], c[3]]))
359                .collect();
360            NDDataBuffer::U32(v)
361        }
362        NDDataType::Int64 => {
363            let v: Vec<i64> = raw.chunks_exact(8)
364                .map(|c| i64::from_ne_bytes(c.try_into().unwrap()))
365                .collect();
366            NDDataBuffer::I64(v)
367        }
368        NDDataType::UInt64 => {
369            let v: Vec<u64> = raw.chunks_exact(8)
370                .map(|c| u64::from_ne_bytes(c.try_into().unwrap()))
371                .collect();
372            NDDataBuffer::U64(v)
373        }
374        NDDataType::Float32 => {
375            let v: Vec<f32> = raw.chunks_exact(4)
376                .map(|c| f32::from_ne_bytes([c[0], c[1], c[2], c[3]]))
377                .collect();
378            NDDataBuffer::F32(v)
379        }
380        NDDataType::Float64 => {
381            let v: Vec<f64> = raw.chunks_exact(8)
382                .map(|c| f64::from_ne_bytes(c.try_into().unwrap()))
383                .collect();
384            NDDataBuffer::F64(v)
385        }
386    }
387}
388
389// ============================================================
390// Processor (wraps either real HDF5 or binary writer)
391// ============================================================
392
393/// HDF5 file processor wrapping NDPluginFileBase + Hdf5Writer.
394/// When the `hdf5` feature is not enabled, uses a binary format fallback.
395pub struct Hdf5FileProcessor {
396    file_base: NDPluginFileBase,
397    writer: Hdf5Writer,
398}
399
400impl Hdf5FileProcessor {
401    pub fn new() -> Self {
402        Self {
403            file_base: NDPluginFileBase::new(),
404            writer: Hdf5Writer::new(),
405        }
406    }
407
408    pub fn file_base_mut(&mut self) -> &mut NDPluginFileBase {
409        &mut self.file_base
410    }
411}
412
413impl Default for Hdf5FileProcessor {
414    fn default() -> Self {
415        Self::new()
416    }
417}
418
419impl NDPluginProcess for Hdf5FileProcessor {
420    fn process_array(&mut self, array: &NDArray, _pool: &NDArrayPool) -> ProcessResult {
421        let _ = self
422            .file_base
423            .process_array(Arc::new(array.clone()), &mut self.writer);
424        ProcessResult::empty() // file plugins are sinks
425    }
426
427    fn plugin_type(&self) -> &str {
428        "NDFileHDF5"
429    }
430}
431
432/// Re-export the real HDF5 writer when the feature is enabled.
433#[cfg(feature = "hdf5")]
434pub use hdf5_real::Hdf5RealWriter;
435
436/// HDF5 file processor using the real hdf5 crate.
437#[cfg(feature = "hdf5")]
438pub struct Hdf5RealFileProcessor {
439    file_base: NDPluginFileBase,
440    writer: Hdf5RealWriter,
441}
442
443#[cfg(feature = "hdf5")]
444impl Hdf5RealFileProcessor {
445    pub fn new() -> Self {
446        Self {
447            file_base: NDPluginFileBase::new(),
448            writer: Hdf5RealWriter::new(),
449        }
450    }
451
452    pub fn file_base_mut(&mut self) -> &mut NDPluginFileBase {
453        &mut self.file_base
454    }
455
456    pub fn set_dataset_name(&mut self, name: &str) {
457        self.writer.set_dataset_name(name);
458    }
459}
460
461#[cfg(feature = "hdf5")]
462impl Default for Hdf5RealFileProcessor {
463    fn default() -> Self {
464        Self::new()
465    }
466}
467
468#[cfg(feature = "hdf5")]
469impl NDPluginProcess for Hdf5RealFileProcessor {
470    fn process_array(&mut self, array: &NDArray, _pool: &NDArrayPool) -> ProcessResult {
471        let _ = self
472            .file_base
473            .process_array(Arc::new(array.clone()), &mut self.writer);
474        ProcessResult::empty()
475    }
476
477    fn plugin_type(&self) -> &str {
478        "NDFileHDF5"
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485    use ad_core::attributes::{NDAttribute, NDAttrSource, NDAttrValue};
486    use std::sync::atomic::{AtomicU32, Ordering};
487
488    static TEST_COUNTER: AtomicU32 = AtomicU32::new(0);
489
490    fn temp_path(prefix: &str) -> PathBuf {
491        let n = TEST_COUNTER.fetch_add(1, Ordering::Relaxed);
492        std::env::temp_dir().join(format!("adcore_test_{}_{}.h5", prefix, n))
493    }
494
495    #[test]
496    fn test_write_single_frame() {
497        let path = temp_path("hdf5_single");
498        let mut writer = Hdf5Writer::new();
499
500        let mut arr = NDArray::new(
501            vec![NDDimension::new(4), NDDimension::new(4)],
502            NDDataType::UInt8,
503        );
504        if let NDDataBuffer::U8(ref mut v) = arr.data {
505            for i in 0..16 { v[i] = i as u8; }
506        }
507
508        writer.open_file(&path, NDFileMode::Single, &arr).unwrap();
509        writer.write_file(&arr).unwrap();
510        writer.close_file().unwrap();
511
512        // Read back
513        let mut reader = Hdf5Writer::new();
514        reader.current_path = Some(path.clone());
515        let read_arr = reader.read_file().unwrap();
516        assert_eq!(read_arr.dims.len(), 2);
517        assert_eq!(read_arr.dims[0].size, 4);
518
519        std::fs::remove_file(&path).ok();
520    }
521
522    #[test]
523    fn test_write_multiple_frames() {
524        let path = temp_path("hdf5_multi");
525        let mut writer = Hdf5Writer::new();
526
527        let arr = NDArray::new(
528            vec![NDDimension::new(4), NDDimension::new(4)],
529            NDDataType::UInt8,
530        );
531
532        writer.open_file(&path, NDFileMode::Stream, &arr).unwrap();
533        writer.write_file(&arr).unwrap();
534        writer.write_file(&arr).unwrap();
535        writer.write_file(&arr).unwrap();
536        writer.close_file().unwrap();
537
538        assert!(writer.supports_multiple_arrays());
539        assert_eq!(writer.frame_count(), 3);
540
541        let meta = std::fs::metadata(&path).unwrap();
542        assert!(meta.len() > 16 * 3); // 3 frames of data
543
544        std::fs::remove_file(&path).ok();
545    }
546
547    #[test]
548    fn test_attributes_stored() {
549        let path = temp_path("hdf5_attrs");
550        let mut writer = Hdf5Writer::new();
551
552        let mut arr = NDArray::new(
553            vec![NDDimension::new(4)],
554            NDDataType::UInt8,
555        );
556        arr.attributes.add(NDAttribute {
557            name: "exposure".into(),
558            description: "".into(),
559            source: NDAttrSource::Driver,
560            value: NDAttrValue::Float64(0.5),
561        });
562
563        writer.open_file(&path, NDFileMode::Single, &arr).unwrap();
564        writer.write_file(&arr).unwrap();
565        writer.close_file().unwrap();
566
567        // File should contain "exposure" and "0.5"
568        let data = std::fs::read(&path).unwrap();
569        let content = String::from_utf8_lossy(&data);
570        assert!(content.contains("exposure"));
571        assert!(content.contains("0.5"));
572
573        std::fs::remove_file(&path).ok();
574    }
575
576    #[test]
577    fn test_roundtrip_u16() {
578        let path = temp_path("hdf5_u16");
579        let mut writer = Hdf5Writer::new();
580
581        let mut arr = NDArray::new(
582            vec![NDDimension::new(4), NDDimension::new(4)],
583            NDDataType::UInt16,
584        );
585        if let NDDataBuffer::U16(ref mut v) = arr.data {
586            for i in 0..16 { v[i] = (i * 100) as u16; }
587        }
588
589        writer.open_file(&path, NDFileMode::Single, &arr).unwrap();
590        writer.write_file(&arr).unwrap();
591        writer.close_file().unwrap();
592
593        let mut reader = Hdf5Writer::new();
594        reader.current_path = Some(path.clone());
595        let read_arr = reader.read_file().unwrap();
596        assert_eq!(read_arr.data.data_type(), NDDataType::UInt16);
597        if let NDDataBuffer::U16(ref v) = read_arr.data {
598            assert_eq!(v[0], 0);
599            assert_eq!(v[1], 100);
600            assert_eq!(v[15], 1500);
601        } else {
602            panic!("expected U16 data");
603        }
604
605        std::fs::remove_file(&path).ok();
606    }
607
608    #[test]
609    fn test_roundtrip_f64() {
610        let path = temp_path("hdf5_f64");
611        let mut writer = Hdf5Writer::new();
612
613        let mut arr = NDArray::new(
614            vec![NDDimension::new(4)],
615            NDDataType::Float64,
616        );
617        if let NDDataBuffer::F64(ref mut v) = arr.data {
618            v[0] = 1.5; v[1] = 2.5; v[2] = 3.5; v[3] = 4.5;
619        }
620
621        writer.open_file(&path, NDFileMode::Single, &arr).unwrap();
622        writer.write_file(&arr).unwrap();
623        writer.close_file().unwrap();
624
625        let mut reader = Hdf5Writer::new();
626        reader.current_path = Some(path.clone());
627        let read_arr = reader.read_file().unwrap();
628        assert_eq!(read_arr.data.data_type(), NDDataType::Float64);
629        if let NDDataBuffer::F64(ref v) = read_arr.data {
630            assert!((v[0] - 1.5).abs() < 1e-10);
631            assert!((v[3] - 4.5).abs() < 1e-10);
632        } else {
633            panic!("expected F64 data");
634        }
635
636        std::fs::remove_file(&path).ok();
637    }
638}