numpress_rs/
lib.rs

1//! Numpress utility.
2//!
3//! A pure rust implementation of [`ms-numpress`], a fast,
4//! minimally lossy compression algorithm for mass spectrometry data.
5//!
6//! # Additional Information
7//!
8//! The API makes extensive use of unsafe Rust features, and therefore
9//! cannot guarantee low-level memory safety. Use at your own risk.
10//!
11//! [`ms-numpress`]: https://github.com/ms-numpress/ms-numpress
12
13#![cfg_attr(not(feature = "std"), no_std)]
14#![cfg_attr(not(feature = "std"), feature(alloc))]
15#![cfg_attr(not(feature = "std"), feature(core_intrinsics))]
16
17#[cfg(not(feature = "std"))]
18#[allow(unused_imports)]
19#[macro_use]
20extern crate alloc;
21
22// FEATURES
23
24/// Facade around the core features for name mangling.
25mod sealed {
26    #[cfg(not(feature = "std"))]
27    pub use core::*;
28
29    #[cfg(feature = "std")]
30    pub use std::*;
31}
32
33use sealed::fmt::{Display, Formatter, Result as FmtResult};
34use sealed::result::Result as StdResult;
35
36#[cfg(feature = "std")]
37use sealed::error::Error as StdError;
38
39#[cfg(not(feature = "std"))]
40pub use alloc::vec::Vec;
41
42#[cfg(test)]
43extern crate rand;
44
45#[cfg(test)]
46#[macro_use]
47extern crate approx;
48
49// INTRINSICS
50
51/// `f64.abs()` feature for `no_std`
52#[cfg(not(feature = "std"))]
53#[inline(always)]
54fn abs(f: f64) -> f64 {
55    unsafe { core::intrinsics::fabsf64(f) }
56}
57
58/// `f64.ceil()` feature for `no_std`
59#[cfg(not(feature = "std"))]
60#[inline(always)]
61fn ceil(f: f64) -> f64 {
62    unsafe { core::intrinsics::ceilf64(f) }
63}
64
65/// `f64.floor()` feature for `no_std`
66#[cfg(not(feature = "std"))]
67#[inline(always)]
68fn floor(f: f64) -> f64 {
69    unsafe { core::intrinsics::floorf64(f) }
70}
71
72/// `f64.abs()` feature for `std`
73#[cfg(feature = "std")]
74#[inline(always)]
75fn abs(f: f64) -> f64 {
76    f.abs()
77}
78
79/// `f64.ceil()` feature for `std`
80#[cfg(feature = "std")]
81#[inline(always)]
82fn ceil(f: f64) -> f64 {
83    f.ceil()
84}
85
86/// `f64.floor()` feature for `std`
87#[cfg(feature = "std")]
88#[inline(always)]
89fn floor(f: f64) -> f64 {
90    f.floor()
91}
92
93// ERROR
94
95/// Type of error encountered during compression or decompression.
96#[derive(Debug, Copy, Clone, Eq, PartialEq)]
97pub enum ErrorKind {
98    /// Encoding or decoding fails due to corrupt input data.
99    CorruptInputData,
100    /// Next number to compress overflows `i64` type.
101    OverflowError,
102    /// Number is out-of-range of `[i32::min_value(), i32::max_value()]`.
103    OutOfRange,
104}
105
106pub trait AsFloat64: Copy {
107    fn as_(&self) -> f64;
108}
109
110
111macro_rules! impl_as_f64 {
112    ($tp:ty) => {
113        impl AsFloat64 for $tp {
114            fn as_(&self) -> f64 {
115                (*self) as f64
116            }
117        }
118    };
119}
120
121impl_as_f64!(f64);
122impl_as_f64!(f32);
123impl_as_f64!(i32);
124impl_as_f64!(i64);
125impl_as_f64!(u32);
126impl_as_f64!(u64);
127
128
129/// Custom error for Numpress compression.
130#[derive(Debug, Copy, Clone, Eq, PartialEq)]
131pub struct Error(ErrorKind);
132
133impl From<ErrorKind> for Error {
134    fn from(kind: ErrorKind) -> Self {
135        Error(kind)
136    }
137}
138
139/// Implied method to generate the description.
140macro_rules! description_impl {
141    ($kind:expr) => {
142        match $kind {
143            ErrorKind::CorruptInputData => "corrupt input data.",
144            ErrorKind::OverflowError => "next number overflows.",
145            ErrorKind::OutOfRange => {
146                "cannot encode number outside of [i32::min_value(), i32::max_value()]."
147            }
148        }
149    };
150}
151
152impl Error {
153    /// Get error type.
154    pub fn kind(&self) -> &ErrorKind {
155        &self.0
156    }
157
158    #[cfg(not(feature = "std"))]
159    fn description(&self) -> &'static str {
160        description_impl!(self.kind())
161    }
162}
163
164impl Display for Error {
165    fn fmt(&self, f: &mut Formatter) -> FmtResult {
166        write!(f, "Numpress error: {}", self.to_string())
167    }
168}
169
170#[cfg(feature = "std")]
171impl StdError for Error {
172    fn description(&self) -> &'static str {
173        description_impl!(self.kind())
174    }
175
176    fn cause(&self) -> Option<&dyn StdError> {
177        None
178    }
179}
180
181/// Specialized result for Numpress operations.
182pub type Result<T> = StdResult<T, Error>;
183
184pub mod low_level {
185
186    use super::sealed::mem::transmute;
187    use super::{abs, ceil, floor, ErrorKind, Result};
188
189    // FIXED POINT
190
191    /// Encode fixed point, disabling all spurious bounds checking for performance.
192    ///
193    /// Encodes the fixed point into `dst`.
194    ///
195    /// * `fixed_point`     - Floating point to encode.
196    /// * `dst`             - Destination buffer with at least 8 elements.
197    pub(crate) unsafe fn encode_fixed_point(fixed_point: f64, dst: *mut u8) {
198        let fp: [u8; 8] = transmute(fixed_point);
199        for i in 0..8 {
200            #[cfg(target_endian = "big")]
201            {
202                *dst.add(i) = *fp.get_unchecked(i);
203            }
204            #[cfg(target_endian = "little")]
205            {
206                *dst.add(i) = *fp.get_unchecked(7 - i);
207            }
208        }
209    }
210
211    /// Decode fixed point, disabling all spurious bounds checking for performance.
212    ///
213    /// Decodes the data into an f64.
214    ///
215    /// * `data`            - Input buffer with at least 8 elements.
216    pub(crate) unsafe fn decode_fixed_point(data: *const u8) -> f64 {
217        let mut fp: [u8; 8] = [0; 8];
218        for i in 0..8 {
219            #[cfg(target_endian = "big")]
220            {
221                *fp.get_unchecked_mut(i) = *data.add(i);
222            }
223            #[cfg(target_endian = "little")]
224            {
225                *fp.get_unchecked_mut(i) = *data.add(7 - i);
226            }
227        }
228
229        transmute(fp)
230    }
231
232    // INT
233
234    /// Encodes the int x as a number of half bytes in res.
235    /// res_length is incremented by the number of half bytes,
236    /// which will be 1 <= n <= 9.
237    ///
238    /// This code cannot overflow, due to the use of multiplication in values
239    /// <= 8, only right bit shifts (>>).
240    pub(crate) unsafe fn encode_int(x: u32, res: *mut u8, res_length: *mut usize) {
241        // get the bit pattern of input
242        let mut m: u32;
243
244        const MASK: u32 = 0xf0000000;
245        let init = x & MASK;
246
247        if init == 0 {
248            let mut l: u8 = 8;
249            for i in 0..8 {
250                m = MASK >> (4 * i);
251                if (x & m) != 0 {
252                    l = i;
253                    break;
254                }
255            }
256            *res = l;
257            for i in l..8 {
258                let xi = x >> (4 * (i - l));
259                let ri = res.add(1 + (i as usize) - (l as usize));
260                *ri = xi as u8;
261            }
262            *res_length += 1 + 8 - (l as usize);
263        } else if init == MASK {
264            let mut l: u8 = 7;
265            for i in 0..8 {
266                m = MASK >> (4 * i);
267                if (x & m) != m {
268                    l = i;
269                    break;
270                }
271            }
272            *res = l + 8;
273            for i in l..8 {
274                let xi = x >> (4 * (i - l));
275                let ri = res.add(1 + (i as usize) - (l as usize));
276                *ri = xi as u8;
277            }
278            *res_length += 1 + 8 - (l as usize);
279        } else {
280            *res = 0;
281            for i in 0..8 {
282                let xi = x >> (4 * i);
283                *res.add(i + 1) = xi as u8;
284            }
285            *res_length += 9;
286        }
287    }
288
289    /// Decodes an int from the half bytes in bp.
290    ///
291    /// Lossless reverse of `encode_int`.
292    pub(crate) unsafe fn decode_int(
293        data: *const u8,
294        di: *mut usize,
295        max_di: usize,
296        half: *mut usize,
297        res: *mut u32,
298    ) -> Result<()> {
299        let n: usize;
300        let mask: u32;
301        let mut m: u32;
302        let head: u8;
303        let mut hb: u8;
304
305        if *half == 0 {
306            let dix = *data.add(*di) >> 4;
307            head = dix as u8;
308        } else {
309            let dix = *data.add(*di) & 0xf;
310            head = dix as u8;
311            *di += 1;
312        }
313
314        *half = 1 - (*half);
315        *res = 0;
316
317        if head <= 8 {
318            n = head as usize;
319        } else {
320            // leading ones, fill n half bytes in res
321            n = (head - 8) as usize;
322            mask = 0xf0000000;
323            for i in 0..n {
324                m = mask >> (4 * i);
325                *res = *res | m;
326            }
327        }
328
329        if n == 8 {
330            return Ok(());
331        }
332
333        if *di + ((8 - n) - (1 - *half)) / 2 >= max_di {
334            return Err(ErrorKind::CorruptInputData.into());
335        }
336
337        for i in n..8 {
338            if *half == 0 {
339                let dix = *data.add(*di) >> 4;
340                hb = dix as u8;
341            } else {
342                let dix = *data.add(*di) & 0xf;
343                hb = dix as u8;
344                *di += 1;
345            }
346            let hb32 = hb as u32;
347            *res = *res | (hb32 << ((i - n) * 4));
348            *half = 1 - (*half);
349        }
350
351        Ok(())
352    }
353
354    // ENCODE/DECODE
355
356    /// Encodes double array using lossy conversion with value prediction.
357    ///
358    /// The resulting binary is maximally 8 + size * 5 bytes, but much less if the
359    /// data is reasonably smooth on the first order.
360    ///
361    /// This encoding is suitable for typical m/z or retention time binary arrays.
362    /// On a test set, the encoding was empirically show to be accurate to at
363    /// least 0.002 ppm.
364    ///
365    /// Returns the number of encoded bytes.
366    ///
367    /// * `data`        - Pointer to double array to be encoded
368    /// * `size`        - Size of double array
369    /// * `result`      - Pointer to resulting byte buffer
370    /// * `scaling`     - The scaling factor used for getting the fixed point repr.
371    pub unsafe fn encode_linear(
372        data: *const f64,
373        data_size: usize,
374        result: *mut u8,
375        scaling: f64,
376    ) -> Result<usize> {
377        let mut ints: [i64; 3] = [0; 3];
378        let mut extrapol: i64;
379
380        encode_fixed_point(scaling, result);
381
382        if data_size == 0 {
383            return Ok(8);
384        }
385
386        ints[1] = (*data * scaling + 0.5) as i64;
387        for i in 0..4 {
388            let di = result.add(8 + i);
389            let xi = (ints[1] >> (i * 8)) & 0xff;
390            *di = xi as u8;
391        }
392
393        if data_size == 1 {
394            return Ok(12);
395        }
396
397        ints[2] = (*data.add(1) * scaling + 0.5) as i64;
398        for i in 0..4 {
399            let di = result.add(12 + i);
400            let xi = (ints[2] >> (i * 8)) & 0xff;
401            *di = xi as u8;
402        }
403
404        let mut half_byte_count: usize = 0;
405        let mut ri: usize = 16;
406        let mut half_bytes: [u8; 10] = [0; 10];
407        const I32_MIN: i64 = i32::min_value() as i64;
408        const I32_MAX: i64 = i32::max_value() as i64;
409        let mut diff: i32;
410
411        for i in 2..data_size {
412            ints[0] = ints[1];
413            ints[1] = ints[2];
414            if (*data.add(i) * scaling + 0.5) as i64 > i64::max_value() {
415                return Err(ErrorKind::OverflowError.into());
416            }
417
418            ints[2] = ((*data.add(i)) * scaling + 0.5) as i64;
419            extrapol = ints[1] + (ints[1] - ints[0]);
420
421            if (ints[2] - extrapol > I32_MAX) || (ints[2] - extrapol < I32_MIN) {
422                return Err(ErrorKind::OutOfRange.into());
423            }
424
425            diff = (ints[2] - extrapol) as i32;
426            encode_int(
427                diff as u32,
428                &mut half_bytes[half_byte_count],
429                &mut half_byte_count,
430            );
431
432            for hbi in (1..half_byte_count).step_by(2) {
433                let di = result.add(ri);
434                let xi = (half_bytes[hbi - 1] << 4) | (half_bytes[hbi] & 0xf);
435                *di = xi as u8;
436                ri += 1;
437            }
438
439            if half_byte_count % 2 != 0 {
440                half_bytes[0] = half_bytes[half_byte_count - 1];
441                half_byte_count = 1;
442            } else {
443                half_byte_count = 0;
444            }
445        }
446
447        if half_byte_count == 1 {
448            let di = result.add(ri);
449            let xi = half_bytes[0] << 4;
450            *di = xi as u8;
451            ri += 1;
452        }
453
454        Ok(ri)
455    }
456
457    /// Decodes data encoded by encode_linear.
458    ///
459    /// The output size is guaranteed to be shorter or equal to
460    /// (|size| - 8) * 2.
461    ///
462    /// Note that this method may throw an error if it deems the input data
463    /// to be corrupt, i.e. the last encoded int does not use the last byte
464    /// in the data. In addition the last encoded int need to use either the
465    /// last halfbyte, or the second last followed by a 0x0 halfbyte.
466    ///
467    /// Returns the number of decoded doubles.
468    ///
469    /// * `src`  - Pointer to bytes to be decoded.
470    /// * `size` - Size of byte array.
471    /// * `dst`  - Pointer to resulting double array.
472    pub unsafe fn decode_linear(
473        data: *const u8,
474        data_size: usize,
475        result: *mut f64,
476    ) -> Result<usize> {
477        // safety checks
478        if data_size == 8 {
479            return Ok(0);
480        }
481
482        if data_size < 8 {
483            return Err(ErrorKind::CorruptInputData.into());
484        }
485
486        if data_size < 12 {
487            return Err(ErrorKind::CorruptInputData.into());
488        }
489
490        let scaling = decode_fixed_point(data);
491        let mut ints: [i64; 3] = [0; 3];
492        let mut extrapol: i64;
493        let mut init: i64;
494        ints[1] = 0;
495        for i in 0..4 {
496            init = *data.add(8 + i) as i64;
497            let xi = (0xff & (init)) << (i * 8);
498            ints[1] = ints[1] | xi;
499        }
500        *result = (ints[1] as f64) / scaling;
501
502        if data_size == 12 {
503            return Ok(1);
504        }
505
506        if data_size < 16 {
507            return Err(ErrorKind::CorruptInputData.into());
508        }
509
510        ints[2] = 0;
511        for i in 0..4 {
512            init = *data.add(12 + i) as i64;
513            let xi = (0xff & (init)) << (i * 8);
514            ints[2] = ints[2] | xi;
515        }
516        *result.add(1) = (ints[2] as f64) / scaling;
517
518        let mut half: usize = 0;
519        let mut ri: usize = 2;
520        let mut di: usize = 16;
521        let mut buff: u32 = 0;
522        let mut diff: i32;
523        let mut y: i64;
524
525        while di < data_size {
526            if di == (data_size - 1) && half == 1 {
527                if (*data.add(di) & 0xf) == 0x0 {
528                    break;
529                }
530            }
531
532            ints[0] = ints[1];
533            ints[1] = ints[2];
534            decode_int(data, &mut di, data_size, &mut half, &mut buff)?;
535            diff = buff as i32;
536
537            extrapol = ints[1] + (ints[1] - ints[0]);
538            y = extrapol + diff as i64;
539            *result.add(ri) = (y as f64) / scaling;
540            ri += 1;
541            ints[2] = y;
542        }
543
544        Ok(ri)
545    }
546
547    // OPTIMAL
548
549    /// Macro for maximum value using partial ordering.
550    macro_rules! max {
551        ($d0:ident, $d1:ident) => {
552            if $d0 < $d1 {
553                $d1
554            } else {
555                $d0
556            }
557        };
558    }
559
560    /// Calculate the optimal scaling factor for Numpress compression.
561    pub unsafe fn optimal_linear_scaling(data: *const f64, data_size: usize) -> f64 {
562        match data_size {
563            0 => 0.,
564            // 2147483647.0 == 0x7FFFFFFFl
565            1 => floor(2147483647.0 / *data),
566            _ => {
567                let d0: f64 = *data;
568                let d1: f64 = *data.add(1);
569                let mut max_double: f64 = max!(d0, d1);
570                let mut extrapol: f64;
571                let mut diff: f64;
572
573                for i in 2..data_size {
574                    let di: f64 = *data.add(i);
575                    let di_1: f64 = *data.add(i - 1);
576                    let di_2: f64 = *data.add(i - 2);
577                    extrapol = di_1 + (di_1 - di_2);
578                    diff = di - extrapol;
579                    let maxi = ceil(abs(diff) + 1.0);
580                    max_double = max!(max_double, maxi);
581                }
582                // 2147483647.0 == 0x7FFFFFFFl
583                floor(2147483647.0 / max_double)
584            }
585        }
586    }
587} // low_level
588
589// API
590
591/// Default scaling factor for compression.
592pub const DEFAULT_SCALING: f64 = 10000.0;
593
594/// High-level compressor for Numpress.
595///
596/// The recommended scaling factor is [`DEFAULT_SCALING`], and the optimal scaling
597/// factor can be calculated via [`optimal_scaling`].
598///
599/// * `data`    - Slice of doubles to be encoded.
600/// * `scaling` - Scaling factor used for getting the fixed point representation.
601///
602/// [`DEFAULT_SCALING`]: constant.DEFAULT_SCALING.html
603/// [`optimal_scaling`]: fn.optimal_scaling.html
604pub fn numpress_compress(data: &[f64], scaling: f64) -> Result<Vec<u8>> {
605    let mut vec: Vec<u8> = Vec::with_capacity(data.len() * 5 + 8);
606
607    unsafe {
608        let src: *const f64 = data.as_ptr();
609        let dst: *mut u8 = vec.as_mut_ptr();
610        let length = low_level::encode_linear(src, data.len(), dst, scaling)?;
611        vec.set_len(length);
612        vec.shrink_to_fit();
613    }
614
615    Ok(vec)
616}
617
618/// High-level decompressor for Numpress.
619///
620/// * `data`    - Slice of encoded doubles as bytes.
621pub fn numpress_decompress(data: &[u8]) -> Result<Vec<f64>> {
622    let mut vec: Vec<f64> = Vec::with_capacity((data.len() - 8) * 2);
623
624    unsafe {
625        let src: *const u8 = data.as_ptr();
626        let dst: *mut f64 = vec.as_mut_ptr();
627        let length = low_level::decode_linear(src, data.len(), dst)?;
628        vec.set_len(length);
629        vec.shrink_to_fit();
630    }
631
632    Ok(vec)
633}
634
635
636/// High-level compressor for Numpress linear encoding.
637///
638/// **NOTE**: This compression method is intended for values stored in sorted order like
639/// m/z or retention time.
640///
641/// The recommended scaling factor is [`DEFAULT_SCALING`], and the optimal scaling
642/// factor can be calculated via [`optimal_scaling`].
643///
644/// # Arguments
645/// * `data`: Slice of doubles to be encoded.
646/// * `result`: The buffer to write the encoded bytes to.
647/// * `scaling`: Scaling factor used for getting the fixed point representation,
648///              calculated from [`optimal_scaling`].
649///
650/// # Returns
651/// The number of bytes encoded
652pub fn encode_linear(
653    data: &[f64],
654    result: &mut Vec<u8>,
655    scaling: f64,
656) -> Result<usize> {
657    let required_capacity = data.len() * 5 + 8;
658    let missing_capacity = required_capacity.saturating_sub(result.capacity());
659    if missing_capacity > 0 {
660        result.reserve(missing_capacity);
661    }
662    unsafe {
663        let src: *const f64 = data.as_ptr();
664        let dst: *mut u8 = result.as_mut_ptr();
665        let length = low_level::encode_linear(src, data.len(), dst, scaling)?;
666        result.set_len(length);
667        result.shrink_to_fit();
668        Ok(length)
669    }
670}
671
672/// The decoder for Numpress linear encoding compression, e.g. [`encode_linear`]
673///
674/// # Arguments
675/// * `data`: The encoded byte array
676/// * `result`: The buffer to write the decoded data to
677///
678/// # Returns
679/// The number of values decoded
680pub fn decode_linear(
681    data: &[u8],
682    result: &mut Vec<f64>,
683) -> Result<usize> {
684    let required_capacity = (data.len() - 8) * 2;
685    let missing_capacity = required_capacity.saturating_sub(result.capacity());
686    if missing_capacity > 0 {
687        result.reserve(missing_capacity);
688    }
689
690    unsafe {
691        let src: *const u8 = data.as_ptr();
692        let dst: *mut f64 = result.as_mut_ptr();
693        let length = low_level::decode_linear(src, data.len(), dst)?;
694        result.set_len(length);
695        result.shrink_to_fit();
696        Ok(length)
697    }
698}
699
700
701/// Calculate the optimal, most-compressed scaling factor for linear encoding compression.
702///
703/// # Arguments
704/// * `data`: Slice of doubles to be encoded.
705pub fn optimal_scaling(data: &[f64]) -> f64 {
706    unsafe { low_level::optimal_linear_scaling(data.as_ptr(), data.len()) }
707}
708
709/// Calculate the optimal, most-compressed scaling factor for short logged float (Slof) encoding compression.
710///
711/// # Arguments
712/// * `data`: Slice of floating point values to be encoded.
713#[inline(always)]
714pub fn optimal_slof_fixed_point<T: AsFloat64>(data: &[T]) -> f64 {
715    let max = data.iter().fold(1.0f64, |max, val| {
716        let x = (val.as_() + 1.0).ln();
717        max.max(x)
718    });
719
720    let fp = ((0xFFFF as f64) / max).floor();
721    return fp;
722}
723
724
725/// High-level compressor for Numpress short logged float encoding.
726///
727/// **NOTE**: This compression method is appropriate for intensity and other
728/// floating point values that do not have an ordered pattern to follow.
729///
730/// # Arguments
731/// * `data`: Slice of doubles to be encoded.
732/// * `result`: The buffer to write the encoded bytes to.
733/// * `fixed_point`: Scaling factor used for getting the fixed point representation, calculated with [`optimal_slof_fixed_point`].
734///
735/// # Returns
736/// The number of bytes encoded
737pub fn encode_slof<T: AsFloat64>(
738    data: &[T],
739    result: &mut Vec<u8>,
740    fixed_point: f64,
741) -> Result<usize> {
742    let n_bytes = 8 + data.len() * 2;
743    result.clear();
744    if result.capacity() < n_bytes {
745        result.reserve(n_bytes - result.capacity());
746    }
747
748    unsafe {
749        low_level::encode_fixed_point(fixed_point, result.as_mut_ptr());
750        result.set_len(8);
751    };
752
753    for val in data {
754        let x: u16 = ((val.as_() + 1.0).ln() * fixed_point + 0.5) as u16;
755        result.push((x & 0xFF) as u8);
756        result.push((x >> 8) as u8);
757    }
758
759    Ok(0)
760}
761
762
763/// The decoder for Numpress short logged float encoding compression, e.g. [`encode_slof`]
764///
765/// # Arguments
766/// * `data`: The encoded byte array
767/// * `result`: The buffer to write the decoded data to
768///
769/// # Returns
770/// The number of values decoded
771pub fn decode_slof(data: &[u8], result: &mut Vec<f64>) -> Result<usize> {
772    let data_size = data.len();
773    // safety checks
774    if data_size == 8 {
775        return Ok(0);
776    }
777
778    if data_size < 8 {
779        return Err(ErrorKind::CorruptInputData.into());
780    }
781
782    let scaling = unsafe { low_level::decode_fixed_point(data.as_ptr()) };
783
784    result.reserve(data_size / 2);
785    let mut i = 0;
786    for block in data[8..data_size].chunks(2) {
787        let x = (block[0] as u16) | ((block[1] as u16) << 8);
788        let y = ((x as f64) / scaling).exp() - 1.0;
789        result.push(y);
790        i += 1;
791    }
792
793    Ok(i)
794}
795
796
797/// High-level compressor for Numpress positive integer encoding.
798///
799/// **NOTE**: This compression method is appropriate for intensity and other
800/// floating point values that do not have an ordered pattern to follow. It removes
801/// the non-integral component of the value, which may make this too imprecise for
802/// coordinate data.
803///
804/// # Arguments
805/// * `data`: Slice of doubles to be encoded.
806/// * `result`: The buffer to write the encoded bytes to.
807///
808/// # Returns
809/// The number of bytes encoded
810pub fn encode_pic<T: AsFloat64>(
811    data: &[T],
812    result: &mut Vec<u8>,
813) -> Result<usize> {
814    let mut half_bytes: [u8; 10] = [0; 10];
815    let mut half_byte_count: usize = 0;
816
817    let n = data.len();
818
819    result.clear();
820    let cap = result.capacity();
821    let delta_cap = (n * 2).saturating_sub(cap);
822    if delta_cap > 0 {
823        result.reserve(delta_cap);
824    }
825
826    for val in data {
827        let count: u32 = (val.as_() + 0.5) as u32;
828        unsafe {
829            low_level::encode_int(
830                count,
831                &mut half_bytes[half_byte_count],
832                &mut half_byte_count,
833            )
834        }
835        for hbi in (1..half_byte_count).step_by(2) {
836            let r = (half_bytes[hbi - 1] << 4) | (half_bytes[hbi] & 0xf);
837            result.push(r);
838        }
839        if half_byte_count % 2 != 0 {
840            half_bytes[0] = half_bytes[half_byte_count - 1];
841            half_byte_count = 1;
842        } else {
843            half_byte_count = 0;
844        }
845    }
846    if half_byte_count == 1 {
847        result.push(half_bytes[0] << 4);
848    }
849    Ok(data.len())
850}
851
852
853/// The decoder for Numpress positive integer encoding compression, e.g. [`encode_pic`]
854///
855/// # Arguments
856/// * `data`: The encoded byte array
857/// * `result`: The buffer to write the decoded data to
858///
859/// # Returns
860/// The number of values decoded
861pub fn decode_pic(data: &[u8], result: &mut Vec<f64>) -> Result<usize> {
862    let data_size = data.len();
863
864    let n_bytes = data_size * 5;
865    result.clear();
866    if result.capacity() < n_bytes {
867        result.reserve(n_bytes - result.capacity());
868    }
869
870    let mut di = 0;
871    let mut half = 0;
872    let mut buff: u32 = 0;
873
874    while di < data_size {
875        if di == (data_size - 1) && half == 1 {
876            if (data[di] & 0xf) == 0x0 {
877                break;
878            }
879        }
880
881        unsafe {
882            low_level::decode_int(data.as_ptr(), &mut di, data_size, &mut half, &mut buff)?;
883        }
884
885        result.push(buff as f64);
886    }
887
888    Ok(0)
889}
890
891#[cfg(test)]
892mod tests {
893    use super::*;
894    use sealed::mem;
895
896    use rand::distributions::Uniform;
897    use rand::{thread_rng, Rng};
898
899    #[cfg(feature = "std")]
900    use std::alloc::{alloc, dealloc, Layout};
901
902    // HELPERS
903
904    macro_rules! assert_abs_diff_list_eq {
905        ($a:expr, $b:expr) => {
906            assert_eq!($a.len(), $b.len());
907            let mut iter = $a.iter().zip($b.iter());
908            for (x, y) in iter {
909                assert_abs_diff_eq!(x, y);
910            }
911        };
912        ($a:expr, $b:expr, $eps:expr) => {
913            assert_eq!($a.len(), $b.len());
914            let iter = $a.iter().zip($b.iter());
915            for (x, y) in iter {
916                assert_abs_diff_eq!(x, y, epsilon = $eps);
917            }
918        };
919    }
920
921    // FIXED POINT
922
923    #[test]
924    fn fixed_point_test() {
925        unsafe {
926            let x: f64 = 32.5;
927            let mut xi: [u8; 8] = [0; 8];
928            low_level::encode_fixed_point(x, xi.as_mut_ptr());
929            assert_eq!(low_level::decode_fixed_point(xi.as_ptr()), x);
930
931            let y: f64 = 1.2e-64;
932            let mut yi: [u8; 8] = [0; 8];
933            low_level::encode_fixed_point(y, yi.as_mut_ptr());
934            assert_eq!(low_level::decode_fixed_point(yi.as_ptr()), y);
935        }
936    }
937
938    #[test]
939    #[cfg(feature = "std")]
940    fn fixed_point_heap_test() {
941        unsafe {
942            const SIZE: usize = mem::size_of::<u8>() * 8;
943            const ALIGN: usize = mem::align_of::<u8>();
944            let layout = Layout::from_size_align_unchecked(SIZE, ALIGN);
945
946            let x: f64 = 32.5;
947            let xi: *mut u8 = alloc(layout);
948            low_level::encode_fixed_point(x, xi);
949            assert_eq!(low_level::decode_fixed_point(xi), x);
950
951            dealloc(xi, layout);
952        }
953    }
954
955    // API
956
957    #[test]
958    fn compress_test() {
959        // Check value compression with default scaling.
960        let decoded: Vec<f64> = vec![100., 101., 102., 103.];
961        let encoded: Vec<u8> = vec![
962            64, 195, 136, 0, 0, 0, 0, 0, 64, 66, 15, 0, 80, 105, 15, 0, 136,
963        ];
964        let result = numpress_compress(&decoded, DEFAULT_SCALING).unwrap();
965        assert_eq!(result, encoded);
966
967        // Check value compression with optimal scaling.
968        let encoded: Vec<u8> = vec![
969            65, 116, 70, 248, 96, 0, 0, 0, 88, 144, 187, 126, 222, 255, 255, 127, 136,
970        ];
971        let result = numpress_compress(&decoded, 21262214.0).unwrap();
972        assert_eq!(result, encoded);
973
974        // Bug fix with custom input.
975        let decoded: Vec<f64> = vec![
976            472.36640759869624,
977            8161.255047730418,
978            31419.174861096908,
979            31340.37083086082,
980            11031.961448006856,
981            35019.3535619803,
982            22837.824611949254,
983            2076.226408785704,
984            23277.55357717535,
985            37604.579217858874,
986            34185.89109314591,
987            5077.6548386088325,
988        ];
989        let encoded: Vec<u8> = vec![
990            64, 195, 136, 0, 0, 0, 0, 0, 208, 19, 72, 0, 6, 79, 221, 4, 25, 69, 167, 73, 152, 57,
991            23, 18, 155, 5, 49, 243, 0, 192, 7, 106, 16, 72, 240, 23, 174, 156, 9, 194, 234, 6,
992            200, 3, 9, 25, 137, 1, 126, 185, 240, 131, 198, 89, 96, 97, 11, 0,
993        ];
994        let result = numpress_compress(&decoded, DEFAULT_SCALING).unwrap();
995        assert_eq!(result, encoded);
996    }
997
998    #[test]
999    fn decompress_test() {
1000        // Check value decompression.
1001        let encoded: [u8; 17] = [
1002            64, 195, 136, 0, 0, 0, 0, 0, 64, 66, 15, 0, 80, 105, 15, 0, 136,
1003        ];
1004        let decoded: Vec<f64> = vec![100., 101., 102., 103.];
1005        let result = numpress_decompress(&encoded).unwrap();
1006        assert_eq!(result, decoded);
1007
1008        // Bug fix with custom input.
1009        let encoded: Vec<u8> = vec![
1010            64, 195, 136, 0, 0, 0, 0, 0, 208, 19, 72, 0, 6, 79, 221, 4, 25, 69, 167, 73, 152, 57,
1011            23, 18, 155, 5, 49, 243, 0, 192, 7, 106, 16, 72, 240, 23, 174, 156, 9, 194, 234, 6,
1012            200, 3, 9, 25, 137, 1, 126, 185, 240, 131, 198, 89, 96, 97, 11, 0,
1013        ];
1014        let decoded: Vec<f64> = vec![
1015            472.3664, 8161.255, 31419.1749, 31340.3708, 11031.9614, 35019.3536, 22837.8246,
1016            2076.2264, 23277.5536, 37604.5792, 34185.8911, 5077.6548,
1017        ];
1018        let result = numpress_decompress(&encoded).unwrap();
1019        assert_abs_diff_list_eq!(result, decoded, 0.001);
1020    }
1021
1022    #[test]
1023    fn optimal_scaling_test() {
1024        // Check optimal fixed point detection
1025        let decoded: [f64; 4] = [100., 101., 102., 103.];
1026        assert_eq!(optimal_scaling(&decoded), 21262214.0);
1027    }
1028
1029    #[test]
1030    #[ignore]
1031    fn fuzz_test() {
1032        // fuzz with random integers to ensure minimal loss and no memory corruption
1033        let mut rng = thread_rng();
1034        let dist = Uniform::new(0f64, 55000f64);
1035        for _ in 0..10000 {
1036            let length: usize = rng.gen_range(0, 10000);
1037            let input: Vec<f64> = rng.sample_iter(&dist).take(length).collect();
1038            let encoded = numpress_compress(input.as_slice(), DEFAULT_SCALING).unwrap();
1039            let decoded = numpress_decompress(encoded.as_slice()).unwrap();
1040            assert_abs_diff_list_eq!(input, decoded, 0.0001);
1041        }
1042    }
1043
1044    #[test]
1045    fn test_slof() {
1046        let dat: Vec<f64> = vec![
1047            472.36640759869624,
1048            8161.255047730418,
1049            31419.174861096908,
1050            31340.37083086082,
1051            11031.961448006856,
1052            35019.3535619803,
1053            22837.824611949254,
1054            2076.226408785704,
1055            23277.55357717535,
1056            37604.579217858874,
1057            34185.89109314591,
1058            5077.6548386088325,
1059        ];
1060
1061        let fp = super::optimal_slof_fixed_point(&dat);
1062
1063        assert_eq!(fp, 6220.0);
1064
1065        let mut buf = Vec::new();
1066
1067        let expected: [u8; 32] = [
1068            64, 184, 76, 0, 0, 0, 0, 0, 170, 149, 217, 218, 153, 251, 138, 251, 44, 226, 60, 254,
1069            217, 243, 153, 185, 80, 244, 247, 255, 166, 253, 82, 207,
1070        ];
1071
1072        super::encode_slof(&dat, &mut buf, fp).unwrap();
1073
1074        assert_eq!(expected.as_slice(), buf);
1075
1076        let mut decoded = Vec::new();
1077
1078        super::decode_slof(&buf, &mut decoded).unwrap();
1079
1080        let expected: [f64; 12] = [
1081            472.33674704,
1082            8160.92010031,
1083            31417.26512109,
1084            31341.58888686,
1085            11032.39290609,
1086            35018.68443674,
1087            22836.8296104,
1088            2076.13741228,
1089            23277.96556204,
1090            37603.81801279,
1091            34184.26011057,
1092            5077.6330903,
1093        ];
1094
1095        let errs: [f64; 12] = [
1096            -0.02966056,
1097            -0.33494742,
1098            -1.90974001,
1099            1.218056,
1100            0.43145808,
1101            -0.66912524,
1102            -0.99500155,
1103            -0.0889965,
1104            0.41198487,
1105            -0.76120506,
1106            -1.63098258,
1107            -0.02174831,
1108        ];
1109
1110        for ((a, b), e) in expected.iter().zip(decoded.iter()).zip(errs) {
1111            let c = *a - *b;
1112            let d = c.abs() - e.abs();
1113            assert!(d < 1e-3, "delta {a} - {b} = {c} is too far from {e} ({d})")
1114        }
1115    }
1116
1117    #[test]
1118    fn test_pic() {
1119        let dat = [
1120            472.36640759869624,
1121            8161.255047730418,
1122            31419.174861096908,
1123            31340.37083086082,
1124            11031.961448006856,
1125            35019.3535619803,
1126            22837.824611949254,
1127            2076.226408785704,
1128            23277.55357717535,
1129            37604.579217858874,
1130            34185.89109314591,
1131            5077.6548386088325,
1132        ];
1133
1134        let mut buf = Vec::new();
1135        encode_pic(&dat, &mut buf).unwrap();
1136
1137        let expected: [u8; 29] = [
1138            88, 209, 65, 239, 20, 187, 167, 76, 106, 116, 129, 178, 75, 200, 132, 99, 149, 92, 24,
1139            78, 234, 84, 94, 41, 74, 133, 132, 109, 49,
1140        ];
1141
1142        assert_eq!(expected.as_slice(), buf);
1143
1144        let mut decoded = Vec::new();
1145        decode_pic(&buf, &mut decoded).unwrap();
1146
1147        let expected = [
1148            472., 8161., 31419., 31340., 11032., 35019., 22838., 2076., 23278., 37605., 34186.,
1149            5078.,
1150        ];
1151
1152        for (a, b) in expected.iter().zip(decoded.iter()) {
1153            let c = *a - *b;
1154            let d = c.abs();
1155            assert!(d < 1.0, "delta {a} - {b} = {c} is too far from 1 ({d})")
1156        }
1157    }
1158}