Skip to main content

rust_hdf5/format/messages/
fill_value.rs

1//! Fill value message (type 0x05) — specifies the default fill value for
2//! unwritten elements.
3//!
4//! Three on-disk versions exist for this message and a conforming reader
5//! must accept all of them (`H5O__fill_new_decode`, H5Ofill.c):
6//!
7//! Versions 1 & 2:
8//!   Byte 0: version (1 or 2)
9//!   Byte 1: space allocation time (1=early, 2=late, 3=incremental)
10//!   Byte 2: fill value write time (0=on_alloc, 1=never, 2=if_set)
11//!   Byte 3: fill value defined (0=undefined, non-zero=defined)
12//!   [if defined != 0]: u32 LE size + `size` bytes of fill data
13//!
14//! Version 3 (the version this crate writes):
15//!   Byte 0: version = 3
16//!   Byte 1: flags — bits 0-1 allocation time, bits 2-3 fill write time,
17//!           0x10 = fill value undefined, 0x20 = explicit fill value present
18//!   [if 0x20 set]: u32 LE size + `size` bytes of fill data
19//!
20//! libhdf5 2.0.0 still writes version 2 for this message, so decoding must
21//! handle every version even though encoding always emits version 3.
22
23use crate::format::{FormatError, FormatResult};
24
25const VERSION: u8 = 3;
26
27// Version-3 flags byte layout (H5Ofill.c).
28const FLAG_MASK_ALLOC: u8 = 0x03;
29const FLAG_MASK_FILL: u8 = 0x03;
30const FLAG_SHIFT_FILL: u8 = 2;
31const FLAG_UNDEFINED: u8 = 0x10;
32const FLAG_HAVE_VALUE: u8 = 0x20;
33const FLAGS_ALL: u8 =
34    FLAG_MASK_ALLOC | (FLAG_MASK_FILL << FLAG_SHIFT_FILL) | FLAG_UNDEFINED | FLAG_HAVE_VALUE;
35
36/// Fill value message payload.
37#[derive(Debug, Clone, PartialEq)]
38pub struct FillValueMessage {
39    /// Space allocation time: 1=early, 2=late, 3=incremental.
40    pub alloc_time: u8,
41    /// Fill value write time: 0=on alloc, 1=never, 2=if set.
42    pub fill_write_time: u8,
43    /// Fill value defined: 0=undefined, 1=default (zeros), 2=user-defined.
44    pub fill_defined: u8,
45    /// User-defined fill value data.  Present only when `fill_defined == 2`.
46    pub fill_value: Option<Vec<u8>>,
47}
48
49impl Default for FillValueMessage {
50    fn default() -> Self {
51        Self {
52            alloc_time: 2,      // late
53            fill_write_time: 0, // on alloc
54            fill_defined: 1,    // default value (zeros)
55            fill_value: None,
56        }
57    }
58}
59
60impl FillValueMessage {
61    /// A user-defined fill value.
62    pub fn with_value(data: Vec<u8>) -> Self {
63        Self {
64            alloc_time: 2,
65            fill_write_time: 0,
66            fill_defined: 2,
67            fill_value: Some(data),
68        }
69    }
70
71    /// An undefined fill value (no fill is performed).
72    pub fn undefined() -> Self {
73        Self {
74            alloc_time: 2,
75            fill_write_time: 1, // never
76            fill_defined: 0,
77            fill_value: None,
78        }
79    }
80
81    // ------------------------------------------------------------------ encode
82
83    /// Encode as a version-3 fill-value message (`H5O__fill_new_encode`).
84    pub fn encode(&self) -> Vec<u8> {
85        let mut buf = Vec::with_capacity(10);
86        buf.push(VERSION);
87
88        let flags = (self.alloc_time & FLAG_MASK_ALLOC)
89            | ((self.fill_write_time & FLAG_MASK_FILL) << FLAG_SHIFT_FILL);
90
91        if self.fill_defined == 0 {
92            // Explicitly undefined: no value follows.
93            buf.push(flags | FLAG_UNDEFINED);
94        } else if let Some(data) = self.fill_value.as_ref().filter(|d| !d.is_empty()) {
95            // Explicit value present.
96            buf.push(flags | FLAG_HAVE_VALUE);
97            buf.extend_from_slice(&(data.len() as u32).to_le_bytes());
98            buf.extend_from_slice(data);
99        } else {
100            // Defined, but no explicit value (default zero fill).
101            buf.push(flags);
102        }
103
104        buf
105    }
106
107    // ------------------------------------------------------------------ decode
108
109    /// Decode a fill-value message of version 1, 2, or 3.
110    pub fn decode(buf: &[u8]) -> FormatResult<(Self, usize)> {
111        if buf.is_empty() {
112            return Err(FormatError::BufferTooShort {
113                needed: 1,
114                available: 0,
115            });
116        }
117        match buf[0] {
118            1 | 2 => Self::decode_v1v2(buf),
119            3 => Self::decode_v3(buf),
120            other => Err(FormatError::InvalidVersion(other)),
121        }
122    }
123
124    /// Decode the version-1/2 layout (separate alloc/fill-time/defined bytes).
125    fn decode_v1v2(buf: &[u8]) -> FormatResult<(Self, usize)> {
126        if buf.len() < 4 {
127            return Err(FormatError::BufferTooShort {
128                needed: 4,
129                available: buf.len(),
130            });
131        }
132        let alloc_time = buf[1];
133        let fill_write_time = buf[2];
134        let defined_byte = buf[3];
135
136        let mut pos = 4;
137        let mut fill_value = None;
138        if defined_byte != 0 {
139            if buf.len() < pos + 4 {
140                return Err(FormatError::BufferTooShort {
141                    needed: pos + 4,
142                    available: buf.len(),
143                });
144            }
145            let size =
146                u32::from_le_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]]) as usize;
147            pos += 4;
148            if size > 0 {
149                if buf.len() < pos + size {
150                    return Err(FormatError::BufferTooShort {
151                        needed: pos + size,
152                        available: buf.len(),
153                    });
154                }
155                fill_value = Some(buf[pos..pos + size].to_vec());
156                pos += size;
157            }
158        }
159
160        // Normalize `fill_defined` onto this crate's tri-state: an explicit
161        // non-empty value is "user-defined", any other defined message is
162        // "default zero fill", an undefined message is "undefined".
163        let fill_defined = if fill_value.is_some() {
164            2
165        } else if defined_byte != 0 {
166            1
167        } else {
168            0
169        };
170
171        Ok((
172            Self {
173                alloc_time,
174                fill_write_time,
175                fill_defined,
176                fill_value,
177            },
178            pos,
179        ))
180    }
181
182    /// Decode the version-3 layout (packed flags byte).
183    fn decode_v3(buf: &[u8]) -> FormatResult<(Self, usize)> {
184        if buf.len() < 2 {
185            return Err(FormatError::BufferTooShort {
186                needed: 2,
187                available: buf.len(),
188            });
189        }
190        let flags = buf[1];
191        if flags & !FLAGS_ALL != 0 {
192            return Err(FormatError::InvalidData(format!(
193                "unknown flags 0x{flags:02x} in version-3 fill-value message"
194            )));
195        }
196        let alloc_time = flags & FLAG_MASK_ALLOC;
197        let fill_write_time = (flags >> FLAG_SHIFT_FILL) & FLAG_MASK_FILL;
198
199        let mut pos = 2;
200        let (fill_defined, fill_value) = if flags & FLAG_UNDEFINED != 0 {
201            if flags & FLAG_HAVE_VALUE != 0 {
202                return Err(FormatError::InvalidData(
203                    "fill-value message sets both the undefined and have-value flags".into(),
204                ));
205            }
206            (0, None)
207        } else if flags & FLAG_HAVE_VALUE != 0 {
208            if buf.len() < pos + 4 {
209                return Err(FormatError::BufferTooShort {
210                    needed: pos + 4,
211                    available: buf.len(),
212                });
213            }
214            let size =
215                u32::from_le_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]]) as usize;
216            pos += 4;
217            if buf.len() < pos + size {
218                return Err(FormatError::BufferTooShort {
219                    needed: pos + size,
220                    available: buf.len(),
221                });
222            }
223            let data = buf[pos..pos + size].to_vec();
224            pos += size;
225            (2, Some(data))
226        } else {
227            (1, None)
228        };
229
230        Ok((
231            Self {
232                alloc_time,
233                fill_write_time,
234                fill_defined,
235                fill_value,
236            },
237            pos,
238        ))
239    }
240}
241
242// ================================================================ tiling helper
243
244/// Build a `total`-byte buffer whose contents are `fill_value` tiled one
245/// element wide, or all zeros when `fill_value` is `None` or empty.
246///
247/// This is the single source of truth for materializing fill values:
248/// chunked reads use it to initialize output buffers, and the writer uses
249/// it to pad partial chunks so that unwritten elements read back as the
250/// fill value rather than zero.
251pub(crate) fn tiled_fill(total: usize, fill_value: Option<&[u8]>) -> Vec<u8> {
252    match fill_value {
253        Some(fv) if !fv.is_empty() && total > 0 => {
254            let mut buf = vec![0u8; total];
255            for slot in buf.chunks_mut(fv.len()) {
256                let n = slot.len().min(fv.len());
257                slot[..n].copy_from_slice(&fv[..n]);
258            }
259            buf
260        }
261        _ => vec![0u8; total],
262    }
263}
264
265/// Fallible variant of [`tiled_fill`] for reader paths.
266///
267/// `total` on a read path is derived from untrusted file fields (dataspace
268/// dimensions, element size). A crafted file can declare an absurd dataset
269/// size; allocating it with `vec![0u8; total]` aborts the process on
270/// allocation failure. This variant uses `try_reserve_exact`, returning a
271/// `TryReserveError` the caller can surface as a clean error instead.
272pub(crate) fn try_tiled_fill(
273    total: usize,
274    fill_value: Option<&[u8]>,
275) -> Result<Vec<u8>, std::collections::TryReserveError> {
276    let mut buf: Vec<u8> = Vec::new();
277    buf.try_reserve_exact(total)?;
278    buf.resize(total, 0);
279    if let Some(fv) = fill_value {
280        if !fv.is_empty() && total > 0 {
281            for slot in buf.chunks_mut(fv.len()) {
282                let n = slot.len().min(fv.len());
283                slot[..n].copy_from_slice(&fv[..n]);
284            }
285        }
286    }
287    Ok(buf)
288}
289
290// ======================================================================= tests
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn roundtrip_default() {
298        let msg = FillValueMessage::default();
299        let encoded = msg.encode();
300        // Version 3: version byte + flags byte, no value.
301        assert_eq!(encoded.len(), 2);
302        let (decoded, consumed) = FillValueMessage::decode(&encoded).unwrap();
303        assert_eq!(consumed, 2);
304        assert_eq!(decoded, msg);
305    }
306
307    #[test]
308    fn roundtrip_user_defined() {
309        let msg = FillValueMessage::with_value(vec![0xDE, 0xAD, 0xBE, 0xEF]);
310        let encoded = msg.encode();
311        // version + flags + u32 size + 4 data = 10
312        assert_eq!(encoded.len(), 10);
313        let (decoded, consumed) = FillValueMessage::decode(&encoded).unwrap();
314        assert_eq!(consumed, 10);
315        assert_eq!(decoded, msg);
316        assert_eq!(
317            decoded.fill_value.as_ref().unwrap(),
318            &vec![0xDE, 0xAD, 0xBE, 0xEF]
319        );
320    }
321
322    #[test]
323    fn roundtrip_undefined() {
324        let msg = FillValueMessage::undefined();
325        let encoded = msg.encode();
326        assert_eq!(encoded.len(), 2);
327        let (decoded, consumed) = FillValueMessage::decode(&encoded).unwrap();
328        assert_eq!(consumed, 2);
329        assert_eq!(decoded, msg);
330    }
331
332    #[test]
333    fn version_3_flags_byte_layout() {
334        // alloc_time=3 (bits 0-1), fill_write_time=2 (bits 2-3),
335        // explicit value present -> 0x20.
336        let msg = FillValueMessage {
337            alloc_time: 3,
338            fill_write_time: 2,
339            fill_defined: 2,
340            fill_value: Some(vec![0x01, 0x02]),
341        };
342        let encoded = msg.encode();
343        assert_eq!(encoded[0], 3);
344        assert_eq!(encoded[1], 0x03 | (0x02 << 2) | FLAG_HAVE_VALUE);
345        assert_eq!(&encoded[2..6], &2u32.to_le_bytes());
346        assert_eq!(&encoded[6..8], &[0x01, 0x02]);
347        assert_eq!(encoded.len(), 8);
348    }
349
350    #[test]
351    fn version_3_undefined_flag() {
352        let encoded = FillValueMessage::undefined().encode();
353        assert_eq!(encoded[1] & FLAG_UNDEFINED, FLAG_UNDEFINED);
354        assert_eq!(encoded[1] & FLAG_HAVE_VALUE, 0);
355    }
356
357    #[test]
358    fn empty_user_data_normalizes_to_default() {
359        // An empty explicit value is indistinguishable from default fill in
360        // the on-disk format, so it round-trips as `fill_defined == 1`.
361        let msg = FillValueMessage {
362            alloc_time: 1,
363            fill_write_time: 2,
364            fill_defined: 2,
365            fill_value: Some(vec![]),
366        };
367        let encoded = msg.encode();
368        assert_eq!(encoded.len(), 2);
369        let (decoded, consumed) = FillValueMessage::decode(&encoded).unwrap();
370        assert_eq!(consumed, 2);
371        assert_eq!(decoded.alloc_time, 1);
372        assert_eq!(decoded.fill_write_time, 2);
373        assert_eq!(decoded.fill_defined, 1);
374        assert_eq!(decoded.fill_value, None);
375    }
376
377    #[test]
378    fn decode_version_1_message() {
379        // Version-1 message with an explicit 4-byte value.
380        let buf = [1u8, 2, 0, 1, 4, 0, 0, 0, 0xAA, 0xBB, 0xCC, 0xDD];
381        let (decoded, consumed) = FillValueMessage::decode(&buf).unwrap();
382        assert_eq!(consumed, 12);
383        assert_eq!(decoded.alloc_time, 2);
384        assert_eq!(decoded.fill_defined, 2);
385        assert_eq!(
386            decoded.fill_value.as_ref().unwrap(),
387            &vec![0xAA, 0xBB, 0xCC, 0xDD]
388        );
389    }
390
391    #[test]
392    fn decode_version_2_message_libhdf5_default() {
393        // The body libhdf5 2.0.0 actually writes: version 2, alloc=2,
394        // fill_time=2, defined=1, size=4.
395        let buf = [2u8, 2, 2, 1, 4, 0, 0, 0, 0x00, 0x00, 0x80, 0xBF];
396        let (decoded, consumed) = FillValueMessage::decode(&buf).unwrap();
397        assert_eq!(consumed, 12);
398        assert_eq!(decoded.alloc_time, 2);
399        assert_eq!(decoded.fill_write_time, 2);
400        assert_eq!(decoded.fill_defined, 2);
401        assert_eq!(
402            decoded.fill_value.as_ref().unwrap(),
403            &vec![0x00, 0x00, 0x80, 0xBF]
404        );
405    }
406
407    #[test]
408    fn decode_version_2_defined_without_value() {
409        // Defined (byte != 0) but size 0: no explicit value.
410        let buf = [2u8, 2, 0, 1, 0, 0, 0, 0];
411        let (decoded, consumed) = FillValueMessage::decode(&buf).unwrap();
412        assert_eq!(consumed, 8);
413        assert_eq!(decoded.fill_defined, 1);
414        assert_eq!(decoded.fill_value, None);
415    }
416
417    #[test]
418    fn decode_bad_version() {
419        for bad in [0u8, 4, 9] {
420            let buf = [bad, 0, 0, 0];
421            match FillValueMessage::decode(&buf).unwrap_err() {
422                FormatError::InvalidVersion(v) if v == bad => {}
423                other => panic!("unexpected error for version {bad}: {other:?}"),
424            }
425        }
426    }
427
428    #[test]
429    fn decode_buffer_too_short() {
430        // Only the version byte — a v3 message needs the flags byte too.
431        let buf = [3u8];
432        match FillValueMessage::decode(&buf).unwrap_err() {
433            FormatError::BufferTooShort { .. } => {}
434            other => panic!("unexpected error: {other:?}"),
435        }
436    }
437
438    #[test]
439    fn decode_v3_unknown_flag_rejected() {
440        // Bit 0x40 is not a defined flag.
441        let buf = [3u8, 0x40];
442        match FillValueMessage::decode(&buf).unwrap_err() {
443            FormatError::InvalidData(_) => {}
444            other => panic!("unexpected error: {other:?}"),
445        }
446    }
447
448    #[test]
449    fn decode_v3_truncated_size() {
450        // HAVE_VALUE flag set but the u32 size field is missing.
451        let buf = [3u8, FLAG_HAVE_VALUE, 0xFF];
452        match FillValueMessage::decode(&buf).unwrap_err() {
453            FormatError::BufferTooShort { .. } => {}
454            other => panic!("unexpected error: {other:?}"),
455        }
456    }
457
458    #[test]
459    fn decode_v3_truncated_data() {
460        // HAVE_VALUE, size=4, but only 2 bytes of data.
461        let buf = [3u8, FLAG_HAVE_VALUE, 4, 0, 0, 0, 0xAA, 0xBB];
462        match FillValueMessage::decode(&buf).unwrap_err() {
463            FormatError::BufferTooShort {
464                needed: 10,
465                available: 8,
466            } => {}
467            other => panic!("unexpected error: {other:?}"),
468        }
469    }
470
471    #[test]
472    fn version_byte() {
473        let encoded = FillValueMessage::default().encode();
474        assert_eq!(encoded[0], 3);
475    }
476
477    #[test]
478    fn tiled_fill_repeats_pattern() {
479        assert_eq!(tiled_fill(0, Some(&[1, 2])), Vec::<u8>::new());
480        assert_eq!(tiled_fill(6, None), vec![0u8; 6]);
481        assert_eq!(tiled_fill(6, Some(&[])), vec![0u8; 6]);
482        assert_eq!(
483            tiled_fill(6, Some(&[0xAB, 0xCD])),
484            vec![0xAB, 0xCD, 0xAB, 0xCD, 0xAB, 0xCD]
485        );
486        // Partial tail when total is not a multiple of the pattern width.
487        assert_eq!(tiled_fill(5, Some(&[1, 2])), vec![1, 2, 1, 2, 1]);
488    }
489}