Skip to main content

hdf5_reader/messages/
filter_pipeline.rs

1//! HDF5 Filter Pipeline message (type 0x000B).
2//!
3//! Describes the sequence of filters applied to chunked data (e.g. deflate,
4//! shuffle, fletcher32). Each filter has an ID, optional name, and optional
5//! client data parameters.
6
7use crate::error::{Error, Result};
8use crate::io::Cursor;
9
10/// Well-known filter IDs.
11pub const FILTER_DEFLATE: u16 = 1;
12pub const FILTER_SHUFFLE: u16 = 2;
13pub const FILTER_FLETCHER32: u16 = 3;
14pub const FILTER_SZIP: u16 = 4;
15pub const FILTER_NBIT: u16 = 5;
16pub const FILTER_SCALEOFFSET: u16 = 6;
17
18/// A single filter in the pipeline.
19#[derive(Debug, Clone)]
20pub struct FilterDescription {
21    /// Filter identification number.
22    pub id: u16,
23    /// Optional filter name (null for well-known filters in v2).
24    pub name: Option<String>,
25    /// Client data parameters.
26    pub client_data: Vec<u32>,
27}
28
29/// Parsed filter pipeline message.
30#[derive(Debug, Clone)]
31pub struct FilterPipelineMessage {
32    pub filters: Vec<FilterDescription>,
33}
34
35/// Parse a filter pipeline message.
36pub fn parse(
37    cursor: &mut Cursor<'_>,
38    _offset_size: u8,
39    _length_size: u8,
40    msg_size: usize,
41) -> Result<FilterPipelineMessage> {
42    let start = cursor.position();
43    let version = cursor.read_u8()?;
44
45    match version {
46        1 => parse_v1(cursor, start, msg_size),
47        2 => parse_v2(cursor, start, msg_size),
48        v => Err(Error::UnsupportedFilterPipelineVersion(v)),
49    }
50}
51
52fn parse_v1(cursor: &mut Cursor<'_>, start: u64, msg_size: usize) -> Result<FilterPipelineMessage> {
53    let n_filters = cursor.read_u8()? as usize;
54    let _reserved = cursor.read_bytes(6)?;
55
56    let mut filters = Vec::with_capacity(n_filters);
57    for _ in 0..n_filters {
58        let id = cursor.read_u16_le()?;
59        let name_len = cursor.read_u16_le()? as usize;
60        let _flags = cursor.read_u16_le()?;
61        let n_client_data = cursor.read_u16_le()? as usize;
62
63        let name = if name_len > 0 {
64            let s = cursor.read_fixed_string(name_len)?;
65            // Pad to 8-byte boundary
66            let padded = (name_len + 7) & !7;
67            if padded > name_len {
68                cursor.skip(padded - name_len)?;
69            }
70            Some(s)
71        } else {
72            None
73        };
74
75        let mut client_data = Vec::with_capacity(n_client_data);
76        for _ in 0..n_client_data {
77            client_data.push(cursor.read_u32_le()?);
78        }
79        // Pad client data to even count (v1 requires padding to 8 bytes)
80        if n_client_data % 2 != 0 {
81            cursor.skip(4)?;
82        }
83
84        filters.push(FilterDescription {
85            id,
86            name,
87            client_data,
88        });
89    }
90
91    let consumed = (cursor.position() - start) as usize;
92    if consumed < msg_size {
93        cursor.skip(msg_size - consumed)?;
94    }
95
96    Ok(FilterPipelineMessage { filters })
97}
98
99fn parse_v2(cursor: &mut Cursor<'_>, start: u64, msg_size: usize) -> Result<FilterPipelineMessage> {
100    let n_filters = cursor.read_u8()? as usize;
101
102    let mut filters = Vec::with_capacity(n_filters);
103    for _ in 0..n_filters {
104        let id = cursor.read_u16_le()?;
105
106        // In v2, user-defined filters (id >= 256) carry a name length field
107        // before flags. The actual name bytes come after n_client_data.
108        let name_len = if id >= 256 {
109            cursor.read_u16_le()? as usize
110        } else {
111            0
112        };
113
114        let _flags = cursor.read_u16_le()?;
115        let n_client_data = cursor.read_u16_le()? as usize;
116
117        // Name bytes (only for user-defined filters)
118        let name = if name_len > 0 {
119            Some(cursor.read_fixed_string(name_len)?)
120        } else {
121            None
122        };
123
124        let mut client_data = Vec::with_capacity(n_client_data);
125        for _ in 0..n_client_data {
126            client_data.push(cursor.read_u32_le()?);
127        }
128
129        filters.push(FilterDescription {
130            id,
131            name,
132            client_data,
133        });
134    }
135
136    let consumed = (cursor.position() - start) as usize;
137    if consumed < msg_size {
138        cursor.skip(msg_size - consumed)?;
139    }
140
141    Ok(FilterPipelineMessage { filters })
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn test_parse_v2_deflate() {
150        let mut data = vec![
151            0x02, // version 2
152            0x01, // 1 filter
153        ];
154        // Filter: deflate (id=1)
155        data.extend_from_slice(&1u16.to_le_bytes()); // id
156                                                     // Well-known (id < 256), so no name_len field
157        data.extend_from_slice(&0u16.to_le_bytes()); // flags
158        data.extend_from_slice(&1u16.to_le_bytes()); // 1 client data value
159        data.extend_from_slice(&6u32.to_le_bytes()); // compression level = 6
160
161        let mut cursor = Cursor::new(&data);
162        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
163        assert_eq!(msg.filters.len(), 1);
164        assert_eq!(msg.filters[0].id, FILTER_DEFLATE);
165        assert!(msg.filters[0].name.is_none());
166        assert_eq!(msg.filters[0].client_data, vec![6]);
167    }
168}