rust_hdf5/format/
nbit_scaleoffset.rs

1//! Pure-Rust ports of the HDF5 N-bit (filter id 5) and Scale-offset
2//! (filter id 6) filters.
3//!
4//! Both ports are byte-exact with libhdf5's `H5Znbit.c` and
5//! `H5Zscaleoffset.c`. The bit-packing helpers mirror the C routines
6//! line-for-line so that crate-decoded chunks match libhdf5 element-exact.
7
8use crate::format::{FormatError, FormatResult};
9
10// ===========================================================================
11//  N-bit filter (H5Z_FILTER_NBIT, id 5)
12// ===========================================================================
13
14// Datatype class codes used in the nbit parameter tree.
15const NBIT_ATOMIC: u32 = 1;
16const NBIT_ARRAY: u32 = 2;
17const NBIT_COMPOUND: u32 = 3;
18const NBIT_NOOPTYPE: u32 = 4;
19const NBIT_ORDER_LE: u32 = 0;
20/// Big-endian order code; referenced by name only in tests, but kept here
21/// so the parameter-tree decoding reads as a complete enumeration.
22#[cfg_attr(not(test), allow(dead_code))]
23const NBIT_ORDER_BE: u32 = 1;
24
25/// Parameters describing one atomic element for the nbit packer.
26#[derive(Clone, Copy)]
27struct NbitAtomic {
28    size: u32,
29    order: u32,
30    precision: u32,
31    offset: u32,
32}
33
34/// A bit cursor over a packed nbit buffer (`j` = byte index,
35/// `buf_len` = remaining unread bits in the current byte).
36struct NbitCursor {
37    j: usize,
38    buf_len: usize,
39}
40
41impl NbitCursor {
42    fn next_byte(&mut self) {
43        self.j += 1;
44        self.buf_len = 8;
45    }
46}
47
48/// `~((unsigned)(~0) << n)` over the low 32 bits.
49fn mask_u32(n: usize) -> u32 {
50    if n >= 32 {
51        u32::MAX
52    } else {
53        !(u32::MAX << n)
54    }
55}
56
57/// Decompress one atomic byte, mirroring `H5Z__nbit_decompress_one_byte`.
58#[allow(clippy::too_many_arguments)]
59fn nbit_decompress_one_byte(
60    data: &mut [u8],
61    data_offset: usize,
62    k: u32,
63    begin_i: u32,
64    end_i: u32,
65    buffer: &[u8],
66    cur: &mut NbitCursor,
67    p: &NbitAtomic,
68    datatype_len: u32,
69) -> FormatResult<()> {
70    if cur.j >= buffer.len() {
71        return Err(FormatError::InvalidData("nbit: buffer too short".into()));
72    }
73    let mut val = buffer[cur.j];
74    let mut dat_offset: usize = 0;
75    let mut dat_len: usize;
76
77    if begin_i != end_i {
78        if k == begin_i {
79            dat_len = 8 - ((datatype_len - p.precision - p.offset) % 8) as usize;
80        } else if k == end_i {
81            dat_len = 8 - (p.offset % 8) as usize;
82            dat_offset = 8 - dat_len;
83        } else {
84            dat_len = 8;
85        }
86    } else {
87        dat_offset = (p.offset % 8) as usize;
88        dat_len = p.precision as usize;
89    }
90
91    let idx = data_offset + k as usize;
92    if cur.buf_len > dat_len {
93        data[idx] =
94            (((val >> (cur.buf_len - dat_len)) as u32 & mask_u32(dat_len)) << dat_offset) as u8;
95        cur.buf_len -= dat_len;
96    } else {
97        data[idx] =
98            (((val as u32 & mask_u32(cur.buf_len)) << (dat_len - cur.buf_len)) << dat_offset) as u8;
99        dat_len -= cur.buf_len;
100        cur.next_byte();
101        if dat_len == 0 {
102            return Ok(());
103        }
104        if cur.j >= buffer.len() {
105            return Err(FormatError::InvalidData("nbit: buffer too short".into()));
106        }
107        val = buffer[cur.j];
108        data[idx] |=
109            (((val >> (cur.buf_len - dat_len)) as u32 & mask_u32(dat_len)) << dat_offset) as u8;
110        cur.buf_len -= dat_len;
111    }
112    Ok(())
113}
114
115/// Compress one atomic byte, mirroring `H5Z__nbit_compress_one_byte`.
116#[allow(clippy::too_many_arguments)]
117fn nbit_compress_one_byte(
118    data: &[u8],
119    data_offset: usize,
120    k: u32,
121    begin_i: u32,
122    end_i: u32,
123    buffer: &mut [u8],
124    cur: &mut NbitCursor,
125    p: &NbitAtomic,
126    datatype_len: u32,
127) {
128    let mut val = data[data_offset + k as usize];
129    let mut dat_len: usize;
130
131    if begin_i != end_i {
132        if k == begin_i {
133            dat_len = 8 - ((datatype_len - p.precision - p.offset) % 8) as usize;
134        } else if k == end_i {
135            dat_len = 8 - (p.offset % 8) as usize;
136            val >>= 8 - dat_len;
137        } else {
138            dat_len = 8;
139        }
140    } else {
141        val >>= p.offset % 8;
142        dat_len = p.precision as usize;
143    }
144
145    if cur.buf_len > dat_len {
146        buffer[cur.j] |= ((val as u32 & mask_u32(dat_len)) << (cur.buf_len - dat_len)) as u8;
147        cur.buf_len -= dat_len;
148    } else {
149        buffer[cur.j] |= ((val as u32 >> (dat_len - cur.buf_len)) & mask_u32(cur.buf_len)) as u8;
150        dat_len -= cur.buf_len;
151        cur.next_byte();
152        if dat_len == 0 {
153            return;
154        }
155        buffer[cur.j] = ((val as u32 & mask_u32(dat_len)) << (cur.buf_len - dat_len)) as u8;
156        cur.buf_len -= dat_len;
157    }
158}
159
160/// Decompress one nooptype element, mirroring `H5Z__nbit_decompress_one_nooptype`.
161fn nbit_decompress_one_nooptype(
162    data: &mut [u8],
163    data_offset: usize,
164    buffer: &[u8],
165    cur: &mut NbitCursor,
166    size: u32,
167) -> FormatResult<()> {
168    for i in 0..size as usize {
169        if cur.j >= buffer.len() {
170            return Err(FormatError::InvalidData("nbit: buffer too short".into()));
171        }
172        let mut val = buffer[cur.j];
173        let mut dat_len: usize = 8;
174        data[data_offset + i] =
175            ((val as u32 & mask_u32(cur.buf_len)) << (dat_len - cur.buf_len)) as u8;
176        dat_len -= cur.buf_len;
177        cur.next_byte();
178        if dat_len == 0 {
179            continue;
180        }
181        if cur.j >= buffer.len() {
182            return Err(FormatError::InvalidData("nbit: buffer too short".into()));
183        }
184        val = buffer[cur.j];
185        data[data_offset + i] |=
186            ((val >> (cur.buf_len - dat_len)) as u32 & mask_u32(dat_len)) as u8;
187        cur.buf_len -= dat_len;
188    }
189    Ok(())
190}
191
192/// Compress one nooptype element, mirroring `H5Z__nbit_compress_one_nooptype`.
193fn nbit_compress_one_nooptype(
194    data: &[u8],
195    data_offset: usize,
196    buffer: &mut [u8],
197    cur: &mut NbitCursor,
198    size: u32,
199) {
200    for i in 0..size as usize {
201        let val = data[data_offset + i];
202        let mut dat_len: usize = 8;
203        buffer[cur.j] |= ((val as u32 >> (dat_len - cur.buf_len)) & mask_u32(cur.buf_len)) as u8;
204        dat_len -= cur.buf_len;
205        cur.next_byte();
206        if dat_len == 0 {
207            continue;
208        }
209        buffer[cur.j] = ((val as u32 & mask_u32(dat_len)) << (cur.buf_len - dat_len)) as u8;
210        cur.buf_len -= dat_len;
211    }
212}
213
214/// Decompress one atomic element, mirroring `H5Z__nbit_decompress_one_atomic`.
215fn nbit_decompress_one_atomic(
216    data: &mut [u8],
217    data_offset: usize,
218    buffer: &[u8],
219    cur: &mut NbitCursor,
220    p: &NbitAtomic,
221) -> FormatResult<()> {
222    let datatype_len = p.size * 8;
223    if p.order == NBIT_ORDER_LE {
224        let begin_i = if !(p.precision + p.offset).is_multiple_of(8) {
225            (p.precision + p.offset) / 8
226        } else {
227            (p.precision + p.offset) / 8 - 1
228        };
229        let end_i = p.offset / 8;
230        let mut k = begin_i as i64;
231        while k >= end_i as i64 {
232            nbit_decompress_one_byte(
233                data,
234                data_offset,
235                k as u32,
236                begin_i,
237                end_i,
238                buffer,
239                cur,
240                p,
241                datatype_len,
242            )?;
243            k -= 1;
244        }
245    } else {
246        let begin_i = (datatype_len - p.precision - p.offset) / 8;
247        let end_i = if !p.offset.is_multiple_of(8) {
248            (datatype_len - p.offset) / 8
249        } else {
250            (datatype_len - p.offset) / 8 - 1
251        };
252        for k in begin_i..=end_i {
253            nbit_decompress_one_byte(
254                data,
255                data_offset,
256                k,
257                begin_i,
258                end_i,
259                buffer,
260                cur,
261                p,
262                datatype_len,
263            )?;
264        }
265    }
266    Ok(())
267}
268
269/// Compress one atomic element, mirroring `H5Z__nbit_compress_one_atomic`.
270fn nbit_compress_one_atomic(
271    data: &[u8],
272    data_offset: usize,
273    buffer: &mut [u8],
274    cur: &mut NbitCursor,
275    p: &NbitAtomic,
276) {
277    let datatype_len = p.size * 8;
278    if p.order == NBIT_ORDER_LE {
279        let begin_i = if !(p.precision + p.offset).is_multiple_of(8) {
280            (p.precision + p.offset) / 8
281        } else {
282            (p.precision + p.offset) / 8 - 1
283        };
284        let end_i = p.offset / 8;
285        let mut k = begin_i as i64;
286        while k >= end_i as i64 {
287            nbit_compress_one_byte(
288                data,
289                data_offset,
290                k as u32,
291                begin_i,
292                end_i,
293                buffer,
294                cur,
295                p,
296                datatype_len,
297            );
298            k -= 1;
299        }
300    } else {
301        let begin_i = (datatype_len - p.precision - p.offset) / 8;
302        let end_i = if !p.offset.is_multiple_of(8) {
303            (datatype_len - p.offset) / 8
304        } else {
305            (datatype_len - p.offset) / 8 - 1
306        };
307        for k in begin_i..=end_i {
308            nbit_compress_one_byte(
309                data,
310                data_offset,
311                k,
312                begin_i,
313                end_i,
314                buffer,
315                cur,
316                p,
317                datatype_len,
318            );
319        }
320    }
321}
322
323/// Read an atomic parameter group starting at `parms[idx]` (after the class
324/// code has already been consumed): `size, order, precision, offset`.
325fn read_atomic(parms: &[u32], idx: &mut usize) -> FormatResult<NbitAtomic> {
326    if *idx + 4 > parms.len() {
327        return Err(FormatError::InvalidData(
328            "nbit: parameter list truncated".into(),
329        ));
330    }
331    let p = NbitAtomic {
332        size: parms[*idx],
333        order: parms[*idx + 1],
334        precision: parms[*idx + 2],
335        offset: parms[*idx + 3],
336    };
337    *idx += 4;
338    // Validate every atomic (top-level, array member, compound member) so
339    // the bit math below cannot overflow or panic on a crafted file.
340    let bits = p.size.checked_mul(8);
341    let span = p.precision.checked_add(p.offset);
342    match (bits, span) {
343        (Some(bits), Some(span))
344            if p.size > 0 && p.precision > 0 && p.precision <= bits && span <= bits => {}
345        _ => {
346            return Err(FormatError::InvalidData(format!(
347                "nbit: invalid atomic datatype (size={}, precision={}, offset={})",
348                p.size, p.precision, p.offset
349            )));
350        }
351    }
352    Ok(p)
353}
354
355/// Decompress one array element, mirroring `H5Z__nbit_decompress_one_array`.
356fn nbit_decompress_one_array(
357    data: &mut [u8],
358    data_offset: usize,
359    buffer: &[u8],
360    cur: &mut NbitCursor,
361    parms: &[u32],
362    parms_index: &mut usize,
363) -> FormatResult<()> {
364    if *parms_index + 2 > parms.len() {
365        return Err(FormatError::InvalidData(
366            "nbit: parameter list truncated".into(),
367        ));
368    }
369    let total_size = parms[*parms_index];
370    let base_class = parms[*parms_index + 1];
371    *parms_index += 2;
372
373    match base_class {
374        NBIT_ATOMIC => {
375            let p = read_atomic(parms, parms_index)?;
376            let n = total_size / p.size;
377            for i in 0..n as usize {
378                nbit_decompress_one_atomic(
379                    data,
380                    data_offset + i * p.size as usize,
381                    buffer,
382                    cur,
383                    &p,
384                )?;
385            }
386        }
387        NBIT_ARRAY => {
388            let base_size = parms[*parms_index];
389            let n = total_size / base_size;
390            let begin_index = *parms_index;
391            for i in 0..n as usize {
392                *parms_index = begin_index;
393                nbit_decompress_one_array(
394                    data,
395                    data_offset + i * base_size as usize,
396                    buffer,
397                    cur,
398                    parms,
399                    parms_index,
400                )?;
401            }
402        }
403        NBIT_COMPOUND => {
404            let base_size = parms[*parms_index];
405            let n = total_size / base_size;
406            let begin_index = *parms_index;
407            for i in 0..n as usize {
408                *parms_index = begin_index;
409                nbit_decompress_one_compound(
410                    data,
411                    data_offset + i * base_size as usize,
412                    buffer,
413                    cur,
414                    parms,
415                    parms_index,
416                )?;
417            }
418        }
419        NBIT_NOOPTYPE => {
420            *parms_index += 1; // skip size of no-op type
421            nbit_decompress_one_nooptype(data, data_offset, buffer, cur, total_size)?;
422        }
423        _ => {
424            return Err(FormatError::InvalidData(format!(
425                "nbit: bad base class {}",
426                base_class
427            )))
428        }
429    }
430    Ok(())
431}
432
433/// Decompress one compound element, mirroring `H5Z__nbit_decompress_one_compound`.
434fn nbit_decompress_one_compound(
435    data: &mut [u8],
436    data_offset: usize,
437    buffer: &[u8],
438    cur: &mut NbitCursor,
439    parms: &[u32],
440    parms_index: &mut usize,
441) -> FormatResult<()> {
442    if *parms_index + 2 > parms.len() {
443        return Err(FormatError::InvalidData(
444            "nbit: parameter list truncated".into(),
445        ));
446    }
447    *parms_index += 1; // skip compound size
448    let nmembers = parms[*parms_index];
449    *parms_index += 1;
450
451    for _ in 0..nmembers {
452        if *parms_index + 2 > parms.len() {
453            return Err(FormatError::InvalidData(
454                "nbit: parameter list truncated".into(),
455            ));
456        }
457        let member_offset = parms[*parms_index] as usize;
458        let member_class = parms[*parms_index + 1];
459        *parms_index += 2;
460
461        match member_class {
462            NBIT_ATOMIC => {
463                let p = read_atomic(parms, parms_index)?;
464                nbit_decompress_one_atomic(data, data_offset + member_offset, buffer, cur, &p)?;
465            }
466            NBIT_ARRAY => {
467                nbit_decompress_one_array(
468                    data,
469                    data_offset + member_offset,
470                    buffer,
471                    cur,
472                    parms,
473                    parms_index,
474                )?;
475            }
476            NBIT_COMPOUND => {
477                nbit_decompress_one_compound(
478                    data,
479                    data_offset + member_offset,
480                    buffer,
481                    cur,
482                    parms,
483                    parms_index,
484                )?;
485            }
486            NBIT_NOOPTYPE => {
487                let size = parms[*parms_index];
488                *parms_index += 1;
489                nbit_decompress_one_nooptype(data, data_offset + member_offset, buffer, cur, size)?;
490            }
491            _ => {
492                return Err(FormatError::InvalidData(format!(
493                    "nbit: bad member class {}",
494                    member_class
495                )))
496            }
497        }
498    }
499    Ok(())
500}
501
502/// Compress one array element, mirroring `H5Z__nbit_compress_one_array`.
503fn nbit_compress_one_array(
504    data: &[u8],
505    data_offset: usize,
506    buffer: &mut [u8],
507    cur: &mut NbitCursor,
508    parms: &[u32],
509    parms_index: &mut usize,
510) -> FormatResult<()> {
511    if *parms_index + 2 > parms.len() {
512        return Err(FormatError::InvalidData(
513            "nbit: parameter list truncated".into(),
514        ));
515    }
516    let total_size = parms[*parms_index];
517    let base_class = parms[*parms_index + 1];
518    *parms_index += 2;
519
520    match base_class {
521        NBIT_ATOMIC => {
522            let p = read_atomic(parms, parms_index)?;
523            let n = total_size / p.size;
524            for i in 0..n as usize {
525                nbit_compress_one_atomic(data, data_offset + i * p.size as usize, buffer, cur, &p);
526            }
527        }
528        NBIT_ARRAY => {
529            let base_size = parms[*parms_index];
530            let n = total_size / base_size;
531            let begin_index = *parms_index;
532            for i in 0..n as usize {
533                *parms_index = begin_index;
534                nbit_compress_one_array(
535                    data,
536                    data_offset + i * base_size as usize,
537                    buffer,
538                    cur,
539                    parms,
540                    parms_index,
541                )?;
542            }
543        }
544        NBIT_COMPOUND => {
545            let base_size = parms[*parms_index];
546            let n = total_size / base_size;
547            let begin_index = *parms_index;
548            for i in 0..n as usize {
549                *parms_index = begin_index;
550                nbit_compress_one_compound(
551                    data,
552                    data_offset + i * base_size as usize,
553                    buffer,
554                    cur,
555                    parms,
556                    parms_index,
557                )?;
558            }
559        }
560        NBIT_NOOPTYPE => {
561            *parms_index += 1;
562            nbit_compress_one_nooptype(data, data_offset, buffer, cur, total_size);
563        }
564        _ => {
565            return Err(FormatError::InvalidData(format!(
566                "nbit: bad base class {}",
567                base_class
568            )))
569        }
570    }
571    Ok(())
572}
573
574/// Compress one compound element, mirroring `H5Z__nbit_compress_one_compound`.
575fn nbit_compress_one_compound(
576    data: &[u8],
577    data_offset: usize,
578    buffer: &mut [u8],
579    cur: &mut NbitCursor,
580    parms: &[u32],
581    parms_index: &mut usize,
582) -> FormatResult<()> {
583    if *parms_index + 2 > parms.len() {
584        return Err(FormatError::InvalidData(
585            "nbit: parameter list truncated".into(),
586        ));
587    }
588    *parms_index += 1;
589    let nmembers = parms[*parms_index];
590    *parms_index += 1;
591
592    for _ in 0..nmembers {
593        if *parms_index + 2 > parms.len() {
594            return Err(FormatError::InvalidData(
595                "nbit: parameter list truncated".into(),
596            ));
597        }
598        let member_offset = parms[*parms_index] as usize;
599        let member_class = parms[*parms_index + 1];
600        *parms_index += 2;
601
602        match member_class {
603            NBIT_ATOMIC => {
604                let p = read_atomic(parms, parms_index)?;
605                nbit_compress_one_atomic(data, data_offset + member_offset, buffer, cur, &p);
606            }
607            NBIT_ARRAY => {
608                nbit_compress_one_array(
609                    data,
610                    data_offset + member_offset,
611                    buffer,
612                    cur,
613                    parms,
614                    parms_index,
615                )?;
616            }
617            NBIT_COMPOUND => {
618                nbit_compress_one_compound(
619                    data,
620                    data_offset + member_offset,
621                    buffer,
622                    cur,
623                    parms,
624                    parms_index,
625                )?;
626            }
627            NBIT_NOOPTYPE => {
628                let size = parms[*parms_index];
629                *parms_index += 1;
630                nbit_compress_one_nooptype(data, data_offset + member_offset, buffer, cur, size);
631            }
632            _ => {
633                return Err(FormatError::InvalidData(format!(
634                    "nbit: bad member class {}",
635                    member_class
636                )))
637            }
638        }
639    }
640    Ok(())
641}
642
643/// Apply the HDF5 N-bit filter.
644///
645/// `cd_values` follows `H5Znbit.c`'s schema:
646/// `[0]` = number of parameters, `[1]` = need-not-compress flag,
647/// `[2]` = element count, `[3..]` = the datatype parameter tree.
648///
649/// On compress, `data` is the raw element buffer; on decompress, `data`
650/// is the packed buffer and the result is the unpacked element buffer.
651pub fn apply_nbit(data: &[u8], cd_values: &[u32], compress: bool) -> FormatResult<Vec<u8>> {
652    if cd_values.len() < 4 {
653        return Err(FormatError::InvalidData("nbit: cd_values too short".into()));
654    }
655    // cd_values[1] != 0 -> data is full-precision, filter is a pass-through.
656    if cd_values[1] != 0 {
657        return Ok(data.to_vec());
658    }
659
660    let d_nelmts = cd_values[2] as usize;
661    let dtype_size = cd_values[4] as usize;
662    if dtype_size == 0 {
663        return Err(FormatError::InvalidData("nbit: zero datatype size".into()));
664    }
665    let unpacked_size = d_nelmts * dtype_size;
666
667    if compress {
668        if data.len() != unpacked_size {
669            return Err(FormatError::InvalidData(format!(
670                "nbit: input size {} != expected {}",
671                data.len(),
672                unpacked_size
673            )));
674        }
675        // Worst case the packed buffer is the same size as the unpacked one.
676        let mut buffer = vec![0u8; unpacked_size + 1];
677        let mut cur = NbitCursor { j: 0, buf_len: 8 };
678        match cd_values[3] {
679            NBIT_ATOMIC => {
680                let mut idx = 4;
681                let p = read_atomic(cd_values, &mut idx)?;
682                for i in 0..d_nelmts {
683                    nbit_compress_one_atomic(data, i * p.size as usize, &mut buffer, &mut cur, &p);
684                }
685            }
686            NBIT_ARRAY => {
687                let size = cd_values[4] as usize;
688                for i in 0..d_nelmts {
689                    let mut idx = 4;
690                    nbit_compress_one_array(
691                        data,
692                        i * size,
693                        &mut buffer,
694                        &mut cur,
695                        cd_values,
696                        &mut idx,
697                    )?;
698                }
699            }
700            NBIT_COMPOUND => {
701                let size = cd_values[4] as usize;
702                for i in 0..d_nelmts {
703                    let mut idx = 4;
704                    nbit_compress_one_compound(
705                        data,
706                        i * size,
707                        &mut buffer,
708                        &mut cur,
709                        cd_values,
710                        &mut idx,
711                    )?;
712                }
713            }
714            other => {
715                return Err(FormatError::InvalidData(format!(
716                    "nbit: unsupported top class {}",
717                    other
718                )))
719            }
720        }
721        // libhdf5 reports new_size + 1 (any hanging bits round up).
722        buffer.truncate(cur.j + 1);
723        Ok(buffer)
724    } else {
725        let mut out = vec![0u8; unpacked_size];
726        let mut cur = NbitCursor { j: 0, buf_len: 8 };
727        match cd_values[3] {
728            NBIT_ATOMIC => {
729                let mut idx = 4;
730                let p = read_atomic(cd_values, &mut idx)?;
731                if p.precision > p.size * 8 || p.precision + p.offset > p.size * 8 {
732                    return Err(FormatError::InvalidData(
733                        "nbit: invalid precision/offset".into(),
734                    ));
735                }
736                for i in 0..d_nelmts {
737                    nbit_decompress_one_atomic(&mut out, i * p.size as usize, data, &mut cur, &p)?;
738                }
739            }
740            NBIT_ARRAY => {
741                let size = cd_values[4] as usize;
742                for i in 0..d_nelmts {
743                    let mut idx = 4;
744                    nbit_decompress_one_array(
745                        &mut out,
746                        i * size,
747                        data,
748                        &mut cur,
749                        cd_values,
750                        &mut idx,
751                    )?;
752                }
753            }
754            NBIT_COMPOUND => {
755                let size = cd_values[4] as usize;
756                for i in 0..d_nelmts {
757                    let mut idx = 4;
758                    nbit_decompress_one_compound(
759                        &mut out,
760                        i * size,
761                        data,
762                        &mut cur,
763                        cd_values,
764                        &mut idx,
765                    )?;
766                }
767            }
768            other => {
769                return Err(FormatError::InvalidData(format!(
770                    "nbit: unsupported top class {}",
771                    other
772                )))
773            }
774        }
775        Ok(out)
776    }
777}
778
779// ===========================================================================
780//  Scale-offset filter (H5Z_FILTER_SCALEOFFSET, id 6)
781// ===========================================================================
782
783// cd_values index layout (H5Zscaleoffset.c).
784const SO_PARM_SCALETYPE: usize = 0;
785const SO_PARM_SCALEFACTOR: usize = 1;
786const SO_PARM_NELMTS: usize = 2;
787const SO_PARM_CLASS: usize = 3;
788const SO_PARM_SIZE: usize = 4;
789const SO_PARM_SIGN: usize = 5;
790const SO_PARM_ORDER: usize = 6;
791const SO_PARM_FILAVAIL: usize = 7;
792/// First cd_values index holding the (optional) packed fill value.
793const SO_PARM_FILVAL: usize = 8;
794
795const SO_CLS_INTEGER: u32 = 0;
796const SO_CLS_FLOAT: u32 = 1;
797const SO_ORDER_LE: u32 = 0;
798const SO_FILL_DEFINED: u32 = 1;
799// Float scale type: 0 = variable-minimum-bits (D-scale); 1 = E-scale (unsupported).
800const SO_FLOAT_DSCALE: u32 = 0;
801
802/// 21-byte parameter header stored in front of every scale-offset chunk.
803const SO_BUF_OFFSET: usize = 21;
804
805/// Decompress one scale-offset byte, mirroring
806/// `H5Z__scaleoffset_decompress_one_byte`.
807#[allow(clippy::too_many_arguments)]
808fn so_decompress_one_byte(
809    data: &mut [u8],
810    data_offset: usize,
811    k: u32,
812    begin_i: u32,
813    buffer: &[u8],
814    cur: &mut NbitCursor,
815    minbits: u32,
816    dtype_len: u32,
817) -> FormatResult<()> {
818    if cur.j >= buffer.len() {
819        return Err(FormatError::InvalidData(
820            "scaleoffset: buffer too short".into(),
821        ));
822    }
823    let mut val = buffer[cur.j];
824    let mut bits_to_copy: usize = if k == begin_i {
825        8 - ((dtype_len - minbits) % 8) as usize
826    } else {
827        8
828    };
829
830    let idx = data_offset + k as usize;
831    if cur.buf_len > bits_to_copy {
832        data[idx] = ((val >> (cur.buf_len - bits_to_copy)) as u32 & mask_u32(bits_to_copy)) as u8;
833        cur.buf_len -= bits_to_copy;
834    } else {
835        data[idx] = ((val as u32 & mask_u32(cur.buf_len)) << (bits_to_copy - cur.buf_len)) as u8;
836        bits_to_copy -= cur.buf_len;
837        cur.next_byte();
838        if bits_to_copy == 0 {
839            return Ok(());
840        }
841        if cur.j >= buffer.len() {
842            return Err(FormatError::InvalidData(
843                "scaleoffset: buffer too short".into(),
844            ));
845        }
846        val = buffer[cur.j];
847        data[idx] |= ((val >> (cur.buf_len - bits_to_copy)) as u32 & mask_u32(bits_to_copy)) as u8;
848        cur.buf_len -= bits_to_copy;
849    }
850    Ok(())
851}
852
853/// Decompress one scale-offset atomic element, mirroring
854/// `H5Z__scaleoffset_decompress_one_atomic`.
855fn so_decompress_one_atomic(
856    data: &mut [u8],
857    data_offset: usize,
858    buffer: &[u8],
859    cur: &mut NbitCursor,
860    size: u32,
861    minbits: u32,
862    order: u32,
863) -> FormatResult<()> {
864    let dtype_len = size * 8;
865    if order == SO_ORDER_LE {
866        let begin_i = size - 1 - (dtype_len - minbits) / 8;
867        let mut k = begin_i as i64;
868        while k >= 0 {
869            so_decompress_one_byte(
870                data,
871                data_offset,
872                k as u32,
873                begin_i,
874                buffer,
875                cur,
876                minbits,
877                dtype_len,
878            )?;
879            k -= 1;
880        }
881    } else {
882        let begin_i = (dtype_len - minbits) / 8;
883        for k in begin_i..=(size - 1) {
884            so_decompress_one_byte(
885                data,
886                data_offset,
887                k,
888                begin_i,
889                buffer,
890                cur,
891                minbits,
892                dtype_len,
893            )?;
894        }
895    }
896    Ok(())
897}
898
899/// Read a little-/big-endian integer of `size` bytes from `data` at `offset`.
900fn read_uint(data: &[u8], offset: usize, size: usize, order: u32) -> u64 {
901    let mut v: u64 = 0;
902    if order == SO_ORDER_LE {
903        for i in 0..size {
904            v |= (data[offset + i] as u64) << (i * 8);
905        }
906    } else {
907        for i in 0..size {
908            v = (v << 8) | data[offset + i] as u64;
909        }
910    }
911    v
912}
913
914/// Write a little-/big-endian integer of `size` bytes into `data` at `offset`.
915fn write_uint(data: &mut [u8], offset: usize, size: usize, order: u32, v: u64) {
916    if order == SO_ORDER_LE {
917        for i in 0..size {
918            data[offset + i] = (v >> (i * 8)) as u8;
919        }
920    } else {
921        for i in 0..size {
922            data[offset + i] = (v >> ((size - 1 - i) * 8)) as u8;
923        }
924    }
925}
926
927/// Reverse the HDF5 scale-offset filter (decompress only).
928///
929/// `cd_values` follows `H5Zscaleoffset.c`'s 20-entry schema. The output is
930/// the raw element buffer in the dataset datatype's byte order.
931pub fn reverse_scaleoffset(data: &[u8], cd_values: &[u32]) -> FormatResult<Vec<u8>> {
932    if cd_values.len() < 8 {
933        return Err(FormatError::InvalidData(
934            "scaleoffset: cd_values too short".into(),
935        ));
936    }
937    let scale_type = cd_values[SO_PARM_SCALETYPE];
938    let scale_factor = cd_values[SO_PARM_SCALEFACTOR] as i32;
939    let d_nelmts = cd_values[SO_PARM_NELMTS] as usize;
940    let dtype_class = cd_values[SO_PARM_CLASS];
941    let size = cd_values[SO_PARM_SIZE] as usize;
942    let dtype_sign = cd_values[SO_PARM_SIGN];
943    let order = cd_values[SO_PARM_ORDER];
944    let filavail = cd_values[SO_PARM_FILAVAIL];
945
946    if size == 0 || size > 8 {
947        return Err(FormatError::InvalidData(format!(
948            "scaleoffset: unsupported datatype size {}",
949            size
950        )));
951    }
952    // Reconstruct the packed fill value from cd_values[8..]. libhdf5 stores
953    // it 4 bytes per cd_value, least-significant cd_value first; each cd_value
954    // holds the bytes in the dataset datatype's byte order. We read it as a
955    // raw `size`-byte little-endian-composed value (correct for the common
956    // little-endian-dataset case h5py emits on x86/ARM).
957    let filval: u64 = if filavail == SO_FILL_DEFINED {
958        let mut v: u64 = 0;
959        let n_cd = size.div_ceil(4);
960        if cd_values.len() < SO_PARM_FILVAL + n_cd {
961            return Err(FormatError::InvalidData(
962                "scaleoffset: cd_values missing fill value".into(),
963            ));
964        }
965        for (w, cd) in cd_values[SO_PARM_FILVAL..SO_PARM_FILVAL + n_cd]
966            .iter()
967            .enumerate()
968        {
969            v |= (*cd as u64) << (w * 32);
970        }
971        if size < 8 {
972            v &= (1u64 << (size * 8)) - 1;
973        }
974        v
975    } else {
976        0
977    };
978    if dtype_class == SO_CLS_FLOAT && scale_type != SO_FLOAT_DSCALE {
979        return Err(FormatError::UnsupportedFeature(
980            "scaleoffset E-scaling method is not supported".into(),
981        ));
982    }
983
984    let size_out = d_nelmts * size;
985
986    // For integer types, scale_factor < 0 is reset to 0 by the library.
987    let int_scalefactor = if scale_factor < 0 { 0 } else { scale_factor };
988    if dtype_class == SO_CLS_INTEGER && int_scalefactor as usize == size * 8 {
989        // No processing: payload after the header is the raw data.
990        if data.len() < SO_BUF_OFFSET + size_out {
991            return Err(FormatError::InvalidData(
992                "scaleoffset: buffer too short".into(),
993            ));
994        }
995        return Ok(data[SO_BUF_OFFSET..SO_BUF_OFFSET + size_out].to_vec());
996    }
997
998    // Read minbits + minval from the 21-byte header (always little-endian).
999    if data.len() < SO_BUF_OFFSET {
1000        return Err(FormatError::InvalidData(
1001            "scaleoffset: buffer too short for header".into(),
1002        ));
1003    }
1004    let mut minbits: u32 = 0;
1005    for (i, &b) in data[..4].iter().enumerate() {
1006        minbits |= (b as u32) << (i * 8);
1007    }
1008    if minbits as usize > size * 8 {
1009        return Err(FormatError::InvalidData(
1010            "scaleoffset: minbits exceeds datatype size".into(),
1011        ));
1012    }
1013    let minval_size = std::cmp::min(8usize, data[4] as usize);
1014    let mut minval: u64 = 0;
1015    for i in 0..minval_size {
1016        minval |= (data[5 + i] as u64) << (i * 8);
1017    }
1018
1019    // Special case: full precision -> payload copied verbatim.
1020    if minbits as usize == size * 8 {
1021        if data.len() < SO_BUF_OFFSET + size_out {
1022            return Err(FormatError::InvalidData(
1023                "scaleoffset: buffer too short".into(),
1024            ));
1025        }
1026        return Ok(data[SO_BUF_OFFSET..SO_BUF_OFFSET + size_out].to_vec());
1027    }
1028
1029    let mut out = vec![0u8; size_out];
1030
1031    if minbits != 0 {
1032        if data.len() < SO_BUF_OFFSET {
1033            return Err(FormatError::InvalidData(
1034                "scaleoffset: buffer too short".into(),
1035            ));
1036        }
1037        let payload = &data[SO_BUF_OFFSET..];
1038        let mut cur = NbitCursor { j: 0, buf_len: 8 };
1039        for i in 0..d_nelmts {
1040            so_decompress_one_atomic(
1041                &mut out,
1042                i * size,
1043                payload,
1044                &mut cur,
1045                size as u32,
1046                minbits,
1047                order,
1048            )?;
1049        }
1050    }
1051    // minbits == 0: out stays all-zero (all elements identical, no fill value).
1052
1053    // Postprocess: add back minval (and apply float scaling).
1054    postdecompress(
1055        &mut out,
1056        d_nelmts,
1057        size,
1058        order,
1059        dtype_class,
1060        dtype_sign,
1061        minbits,
1062        minval,
1063        scale_factor,
1064        filavail == SO_FILL_DEFINED,
1065        filval,
1066    );
1067
1068    Ok(out)
1069}
1070
1071/// Sign-extend the low `size*8` bits of `v` to a full `i64`.
1072fn sign_extend(v: u64, size: usize) -> i64 {
1073    if size >= 8 {
1074        return v as i64;
1075    }
1076    let bits = size * 8;
1077    let shift = 64 - bits;
1078    ((v << shift) as i64) >> shift
1079}
1080
1081/// Postprocess decompressed scale-offset data.
1082#[allow(clippy::too_many_arguments)]
1083fn postdecompress(
1084    out: &mut [u8],
1085    d_nelmts: usize,
1086    size: usize,
1087    order: u32,
1088    dtype_class: u32,
1089    dtype_sign: u32,
1090    minbits: u32,
1091    minval: u64,
1092    scale_factor: i32,
1093    fill_defined: bool,
1094    filval: u64,
1095) {
1096    // Sentinel: a fully decompressed value equal to (1 << minbits) - 1 is
1097    // restored to the fill value rather than offset-added.
1098    let sentinel: u64 = if (minbits as usize) >= 64 {
1099        u64::MAX
1100    } else {
1101        (1u64 << minbits) - 1
1102    };
1103    let width_mask: u64 = if size >= 8 {
1104        u64::MAX
1105    } else {
1106        (1u64 << (size * 8)) - 1
1107    };
1108
1109    if dtype_class == SO_CLS_INTEGER {
1110        // buf[i] = (buf[i] == sentinel) ? filval : buf[i] + minval.
1111        for i in 0..d_nelmts {
1112            let off = i * size;
1113            let v = read_uint(out, off, size, order);
1114            let result = if fill_defined && v == sentinel {
1115                filval
1116            } else {
1117                v.wrapping_add(minval) & width_mask
1118            };
1119            write_uint(out, off, size, order, result);
1120        }
1121        let _ = dtype_sign;
1122    } else {
1123        // Float D-scale: value = (signed decompressed int) / 10^D + min,
1124        // where `min` reinterprets `minval`'s low bits as the float type.
1125        let d_val = scale_factor as f64;
1126        let divisor = 10f64.powf(d_val);
1127        if size == 4 {
1128            let min = f32::from_bits(minval as u32);
1129            let filval_f = f32::from_bits(filval as u32);
1130            for i in 0..d_nelmts {
1131                let off = i * size;
1132                let raw = read_uint(out, off, size, order);
1133                let val = if fill_defined && raw == sentinel {
1134                    filval_f
1135                } else {
1136                    (sign_extend(raw, size) as f32) / (divisor as f32) + min
1137                };
1138                write_uint(out, off, size, order, val.to_bits() as u64);
1139            }
1140        } else if size == 8 {
1141            let min = f64::from_bits(minval);
1142            let filval_f = f64::from_bits(filval);
1143            for i in 0..d_nelmts {
1144                let off = i * size;
1145                let raw = read_uint(out, off, size, order);
1146                if fill_defined && raw == sentinel {
1147                    write_uint(out, off, size, order, filval_f.to_bits());
1148                    continue;
1149                }
1150                let val = (sign_extend(raw, size) as f64) / divisor + min;
1151                write_uint(out, off, size, order, val.to_bits());
1152            }
1153        }
1154    }
1155}
1156
1157// ===========================================================================
1158//  Post-filter datatype conversion (H5T_convert equivalent)
1159// ===========================================================================
1160
1161use crate::format::messages::datatype::{ByteOrder, DatatypeMessage};
1162
1163/// True if `dt` is a standard IEEE-754 binary32/binary64 layout (the only
1164/// floating-point layouts the crate can faithfully reinterpret in place).
1165fn is_standard_ieee_float(dt: &DatatypeMessage) -> bool {
1166    match dt {
1167        DatatypeMessage::FloatingPoint {
1168            size,
1169            sign_location,
1170            bit_offset,
1171            bit_precision,
1172            exponent_location,
1173            exponent_size,
1174            mantissa_location,
1175            mantissa_size,
1176            exponent_bias,
1177            ..
1178        } => {
1179            let bits = *size * 8;
1180            let is_ieee32 = bits == 32
1181                && *bit_offset == 0
1182                && *bit_precision == 32
1183                && *sign_location == 31
1184                && *exponent_location == 23
1185                && *exponent_size == 8
1186                && *mantissa_location == 0
1187                && *mantissa_size == 23
1188                && *exponent_bias == 127;
1189            let is_ieee64 = bits == 64
1190                && *bit_offset == 0
1191                && *bit_precision == 64
1192                && *sign_location == 63
1193                && *exponent_location == 52
1194                && *exponent_size == 11
1195                && *mantissa_location == 0
1196                && *mantissa_size == 52
1197                && *exponent_bias == 1023;
1198            is_ieee32 || is_ieee64
1199        }
1200        _ => false,
1201    }
1202}
1203
1204/// True if the filter-pipeline / on-disk output for `dt` needs a post-filter
1205/// datatype conversion before the element values are usable.
1206///
1207/// For a `FixedPoint` datatype the filter pipeline output (or contiguous
1208/// on-disk bytes) carries the significant value in `bit_precision` bits
1209/// starting at `bit_offset`, with the rest zero-filled and the sign bit NOT
1210/// extended. libhdf5 fixes this up with a datatype conversion
1211/// (`H5T_convert`) after the filter pipeline; this returns true for any
1212/// such non-trivial layout.
1213///
1214/// It also returns true for a non-standard `FloatingPoint` layout, so the
1215/// caller routes it through [`apply_datatype_conversion`], which then
1216/// returns a clear error rather than silently yielding wrong data.
1217pub fn datatype_needs_bit_conversion(dt: &DatatypeMessage) -> bool {
1218    match dt {
1219        DatatypeMessage::FixedPoint {
1220            size,
1221            bit_offset,
1222            bit_precision,
1223            ..
1224        } => *bit_offset != 0 || (*bit_precision as u32) < *size * 8,
1225        DatatypeMessage::FloatingPoint { .. } => !is_standard_ieee_float(dt),
1226        _ => false,
1227    }
1228}
1229
1230/// Apply the post-filter datatype conversion in place to a fully-decoded
1231/// output buffer.
1232///
1233/// This mirrors libhdf5's `H5T_convert` step that runs AFTER the filter
1234/// pipeline. For a `FixedPoint` datatype with `bit_offset != 0` or
1235/// `bit_precision < size*8`, each `size`-byte element is rewritten so the
1236/// significant value occupies the whole element with bit offset 0:
1237///
1238///   1. interpret the element as an unsigned integer (respecting byte order),
1239///   2. shift right by `bit_offset`,
1240///   3. mask to `bit_precision` low bits,
1241///   4. sign-extend from bit `bit_precision-1` if the type is signed,
1242///   5. write the result back in the same byte order.
1243///
1244/// It is a strict no-op for ordinary full-width datatypes (and for any
1245/// non-`FixedPoint` class).
1246///
1247/// For `FloatingPoint` types with a non-standard bit layout that cannot be
1248/// faithfully reinterpreted, an error is returned rather than wrong data.
1249pub fn apply_datatype_conversion(buffer: &mut [u8], dt: &DatatypeMessage) -> FormatResult<()> {
1250    match dt {
1251        DatatypeMessage::FixedPoint {
1252            size,
1253            byte_order,
1254            signed,
1255            bit_offset,
1256            bit_precision,
1257        } => {
1258            let size = *size as usize;
1259            let precision = *bit_precision as usize;
1260            let offset = *bit_offset as usize;
1261
1262            // Full-width plain integer: nothing to do.
1263            if offset == 0 && precision == size * 8 {
1264                return Ok(());
1265            }
1266            if size == 0 || size > 8 {
1267                return Err(FormatError::InvalidData(format!(
1268                    "datatype conversion: unsupported FixedPoint size {size}"
1269                )));
1270            }
1271            if precision == 0 || offset + precision > size * 8 {
1272                return Err(FormatError::InvalidData(format!(
1273                    "datatype conversion: invalid bit layout (offset {offset}, \
1274                     precision {precision}, size {size})"
1275                )));
1276            }
1277            if !buffer.len().is_multiple_of(size) {
1278                return Err(FormatError::InvalidData(format!(
1279                    "datatype conversion: buffer length {} not a multiple of \
1280                     element size {size}",
1281                    buffer.len()
1282                )));
1283            }
1284
1285            let big_endian = matches!(byte_order, ByteOrder::BigEndian);
1286            let precision_mask: u64 = if precision == 64 {
1287                u64::MAX
1288            } else {
1289                (1u64 << precision) - 1
1290            };
1291            let sign_bit: u64 = 1u64 << (precision - 1);
1292
1293            for elem in buffer.chunks_exact_mut(size) {
1294                // Load element as a u64 in native value space.
1295                let mut raw: u64 = 0;
1296                if big_endian {
1297                    for &b in elem.iter() {
1298                        raw = (raw << 8) | b as u64;
1299                    }
1300                } else {
1301                    for (i, &b) in elem.iter().enumerate() {
1302                        raw |= (b as u64) << (8 * i);
1303                    }
1304                }
1305
1306                // Extract the significant bits.
1307                let mut value = (raw >> offset) & precision_mask;
1308
1309                // Sign-extend from bit `precision-1` when signed.
1310                if *signed && (value & sign_bit) != 0 {
1311                    value |= !precision_mask;
1312                }
1313
1314                // Store back in the same byte order, full element width.
1315                if big_endian {
1316                    for i in 0..size {
1317                        elem[size - 1 - i] = (value >> (8 * i)) as u8;
1318                    }
1319                } else {
1320                    for (i, b) in elem.iter_mut().enumerate() {
1321                        *b = (value >> (8 * i)) as u8;
1322                    }
1323                }
1324            }
1325            Ok(())
1326        }
1327        DatatypeMessage::FloatingPoint { .. } => {
1328            // Standard IEEE-754 layouts need no conversion. Anything else
1329            // cannot be faithfully reinterpreted here.
1330            if is_standard_ieee_float(dt) {
1331                Ok(())
1332            } else {
1333                Err(FormatError::InvalidData(
1334                    "datatype conversion: non-standard floating-point bit \
1335                     layout cannot be converted"
1336                        .into(),
1337                ))
1338            }
1339        }
1340        _ => Ok(()),
1341    }
1342}
1343
1344// ===========================================================================
1345//  Tests
1346// ===========================================================================
1347#[cfg(test)]
1348mod tests {
1349    use super::*;
1350
1351    /// Build an nbit cd_values list for an unsigned little-endian atomic int.
1352    fn nbit_atomic_cd(d_nelmts: u32, size: u32, precision: u32, offset: u32) -> Vec<u32> {
1353        // [0]=nparms [1]=need_not_compress [2]=d_nelmts [3]=class [4]=size
1354        // [5]=order [6]=precision [7]=offset
1355        let need_not_compress = if offset == 0 && precision == size * 8 {
1356            1
1357        } else {
1358            0
1359        };
1360        vec![
1361            8,
1362            need_not_compress,
1363            d_nelmts,
1364            NBIT_ATOMIC,
1365            size,
1366            NBIT_ORDER_LE,
1367            precision,
1368            offset,
1369        ]
1370    }
1371
1372    #[test]
1373    fn nbit_roundtrip_u16_precision12() {
1374        // 16-bit storage, 12-bit precision, offset 0.
1375        let values: Vec<u16> = (0..40u16).map(|i| (i * 71) & 0x0FFF).collect();
1376        let mut raw = Vec::new();
1377        for &v in &values {
1378            raw.extend_from_slice(&v.to_le_bytes());
1379        }
1380        let cd = nbit_atomic_cd(values.len() as u32, 2, 12, 0);
1381        let packed = apply_nbit(&raw, &cd, true).unwrap();
1382        assert!(packed.len() <= raw.len());
1383        let unpacked = apply_nbit(&packed, &cd, false).unwrap();
1384        assert_eq!(unpacked, raw);
1385    }
1386
1387    #[test]
1388    fn nbit_roundtrip_u32_precision20_offset4() {
1389        let values: Vec<u32> = (0..32u32).map(|i| ((i * 9999) & 0xFFFFF) << 4).collect();
1390        let mut raw = Vec::new();
1391        for &v in &values {
1392            raw.extend_from_slice(&v.to_le_bytes());
1393        }
1394        let cd = nbit_atomic_cd(values.len() as u32, 4, 20, 4);
1395        let packed = apply_nbit(&raw, &cd, true).unwrap();
1396        let unpacked = apply_nbit(&packed, &cd, false).unwrap();
1397        assert_eq!(unpacked, raw);
1398    }
1399
1400    #[test]
1401    fn nbit_passthrough_full_precision() {
1402        let raw: Vec<u8> = (0..64).collect();
1403        let cd = nbit_atomic_cd(16, 4, 32, 0); // full precision -> need_not_compress
1404        let packed = apply_nbit(&raw, &cd, true).unwrap();
1405        assert_eq!(packed, raw);
1406        let unpacked = apply_nbit(&packed, &cd, false).unwrap();
1407        assert_eq!(unpacked, raw);
1408    }
1409
1410    #[test]
1411    fn nbit_roundtrip_big_endian() {
1412        let values: Vec<u16> = (0..24u16).map(|i| (i * 53) & 0x03FF).collect();
1413        let mut raw = Vec::new();
1414        for &v in &values {
1415            raw.extend_from_slice(&v.to_be_bytes());
1416        }
1417        let mut cd = nbit_atomic_cd(values.len() as u32, 2, 10, 0);
1418        cd[5] = NBIT_ORDER_BE;
1419        let packed = apply_nbit(&raw, &cd, true).unwrap();
1420        let unpacked = apply_nbit(&packed, &cd, false).unwrap();
1421        assert_eq!(unpacked, raw);
1422    }
1423
1424    // ---------------------------------------------------------------
1425    //  Post-filter datatype conversion
1426    // ---------------------------------------------------------------
1427
1428    fn fixed(size: u32, signed: bool, offset: u16, precision: u16) -> DatatypeMessage {
1429        DatatypeMessage::FixedPoint {
1430            size,
1431            byte_order: ByteOrder::LittleEndian,
1432            signed,
1433            bit_offset: offset,
1434            bit_precision: precision,
1435        }
1436    }
1437
1438    #[test]
1439    fn conversion_noop_for_full_width_types() {
1440        // 32-bit unsigned, offset 0, precision 32 -> plain integer, no-op.
1441        let dt = fixed(4, false, 0, 32);
1442        assert!(!datatype_needs_bit_conversion(&dt));
1443        let mut buf = vec![0x78, 0x56, 0x34, 0x12, 0xFF, 0xFF, 0xFF, 0xFF];
1444        let before = buf.clone();
1445        apply_datatype_conversion(&mut buf, &dt).unwrap();
1446        assert_eq!(buf, before);
1447    }
1448
1449    #[test]
1450    fn conversion_noop_for_non_numeric_types() {
1451        let dt = DatatypeMessage::fixed_string(8);
1452        assert!(!datatype_needs_bit_conversion(&dt));
1453        let mut buf = b"hello!!\0".to_vec();
1454        let before = buf.clone();
1455        apply_datatype_conversion(&mut buf, &dt).unwrap();
1456        assert_eq!(buf, before);
1457    }
1458
1459    #[test]
1460    fn conversion_unsigned_offset_shifts_right() {
1461        // u16, bit_offset 3, precision 10. The value lives in bits [3,13).
1462        // Raw element layout (LE u16): value 0x2A5 placed at offset 3 ->
1463        // 0x2A5 << 3 = 0x1528.
1464        let dt = fixed(2, false, 3, 10);
1465        assert!(datatype_needs_bit_conversion(&dt));
1466        let mut buf = (0x1528u16).to_le_bytes().to_vec();
1467        apply_datatype_conversion(&mut buf, &dt).unwrap();
1468        assert_eq!(u16::from_le_bytes([buf[0], buf[1]]), 0x2A5);
1469    }
1470
1471    #[test]
1472    fn conversion_signed_negative_sign_extends() {
1473        // i16, bit_offset 4, precision 8. Store -3 (8-bit two's complement
1474        // = 0xFD) at offset 4: 0xFD << 4 = 0xFD0.
1475        let dt = fixed(2, true, 4, 8);
1476        let mut buf = (0x0FD0u16).to_le_bytes().to_vec();
1477        apply_datatype_conversion(&mut buf, &dt).unwrap();
1478        assert_eq!(i16::from_le_bytes([buf[0], buf[1]]), -3);
1479    }
1480
1481    #[test]
1482    fn conversion_signed_positive_stays_positive() {
1483        // i16, bit_offset 4, precision 8. Store +5 at offset 4 -> 0x050.
1484        let dt = fixed(2, true, 4, 8);
1485        let mut buf = (0x0050u16).to_le_bytes().to_vec();
1486        apply_datatype_conversion(&mut buf, &dt).unwrap();
1487        assert_eq!(i16::from_le_bytes([buf[0], buf[1]]), 5);
1488    }
1489
1490    #[test]
1491    fn conversion_reduced_precision_offset_zero() {
1492        // i32, bit_offset 0, precision 20 -> still non-trivial (precision <
1493        // size*8). Store -1 in 20 bits = 0xFFFFF.
1494        let dt = fixed(4, true, 0, 20);
1495        assert!(datatype_needs_bit_conversion(&dt));
1496        let mut buf = (0x000FFFFFu32).to_le_bytes().to_vec();
1497        apply_datatype_conversion(&mut buf, &dt).unwrap();
1498        assert_eq!(i32::from_le_bytes(buf.clone().try_into().unwrap()), -1);
1499    }
1500
1501    #[test]
1502    fn conversion_big_endian_signed() {
1503        // i16, BE, bit_offset 4, precision 8, value -3.
1504        let dt = DatatypeMessage::FixedPoint {
1505            size: 2,
1506            byte_order: ByteOrder::BigEndian,
1507            signed: true,
1508            bit_offset: 4,
1509            bit_precision: 8,
1510        };
1511        let mut buf = (0x0FD0u16).to_be_bytes().to_vec();
1512        apply_datatype_conversion(&mut buf, &dt).unwrap();
1513        assert_eq!(i16::from_be_bytes([buf[0], buf[1]]), -3);
1514    }
1515
1516    #[test]
1517    fn conversion_multiple_elements() {
1518        // u32, bit_offset 5, precision 16. Three elements.
1519        let dt = fixed(4, false, 5, 16);
1520        let vals: [u32; 3] = [0x1234, 0xABCD, 0x0001];
1521        let mut buf = Vec::new();
1522        for v in vals {
1523            buf.extend_from_slice(&(v << 5).to_le_bytes());
1524        }
1525        apply_datatype_conversion(&mut buf, &dt).unwrap();
1526        for (i, v) in vals.iter().enumerate() {
1527            let e = u32::from_le_bytes(buf[i * 4..i * 4 + 4].try_into().unwrap());
1528            assert_eq!(e, *v);
1529        }
1530    }
1531
1532    #[test]
1533    fn conversion_rejects_non_standard_float() {
1534        // A float with a non-IEEE bit layout must error, not corrupt data.
1535        let dt = DatatypeMessage::FloatingPoint {
1536            size: 4,
1537            byte_order: ByteOrder::LittleEndian,
1538            sign_location: 30,
1539            bit_offset: 1,
1540            bit_precision: 31,
1541            exponent_location: 22,
1542            exponent_size: 8,
1543            mantissa_location: 0,
1544            mantissa_size: 22,
1545            exponent_bias: 127,
1546        };
1547        assert!(datatype_needs_bit_conversion(&dt));
1548        let mut buf = vec![0u8; 4];
1549        assert!(apply_datatype_conversion(&mut buf, &dt).is_err());
1550    }
1551
1552    #[test]
1553    fn conversion_standard_float_is_noop() {
1554        let dt = DatatypeMessage::f64_type();
1555        assert!(!datatype_needs_bit_conversion(&dt));
1556        let mut buf = 12.5f64.to_le_bytes().to_vec();
1557        let before = buf.clone();
1558        apply_datatype_conversion(&mut buf, &dt).unwrap();
1559        assert_eq!(buf, before);
1560    }
1561
1562    #[test]
1563    fn conversion_rejects_bad_buffer_length() {
1564        let dt = fixed(4, false, 3, 16);
1565        let mut buf = vec![0u8; 5]; // not a multiple of 4
1566        assert!(apply_datatype_conversion(&mut buf, &dt).is_err());
1567    }
1568}
rust_hdf5/format/nbit_scaleoffset.rs

rust_hdf5/format/
nbit_scaleoffset.rs