vortex_zigzag/
compress.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
use vortex::array::PrimitiveArray;
use vortex::validity::Validity;
use vortex::IntoArray;
use vortex_dtype::{NativePType, PType};
use vortex_error::{vortex_bail, VortexResult};
use zigzag::ZigZag as ExternalZigZag;

use crate::ZigZagArray;

pub fn zigzag_encode(parray: PrimitiveArray) -> VortexResult<ZigZagArray> {
    let validity = parray.validity();
    let encoded = match parray.ptype() {
        PType::I8 => zigzag_encode_primitive::<i8>(parray.into_maybe_null_slice(), validity),
        PType::I16 => zigzag_encode_primitive::<i16>(parray.into_maybe_null_slice(), validity),
        PType::I32 => zigzag_encode_primitive::<i32>(parray.into_maybe_null_slice(), validity),
        PType::I64 => zigzag_encode_primitive::<i64>(parray.into_maybe_null_slice(), validity),
        _ => vortex_bail!(
            "ZigZag can only encode signed integers, got {}",
            parray.ptype()
        ),
    };
    ZigZagArray::try_new(encoded.into_array())
}

fn zigzag_encode_primitive<T: ExternalZigZag + NativePType>(
    values: Vec<T>,
    validity: Validity,
) -> PrimitiveArray
where
    <T as ExternalZigZag>::UInt: NativePType,
{
    PrimitiveArray::from_vec(values.into_iter().map(|v| T::encode(v)).collect(), validity)
}

pub fn zigzag_decode(parray: PrimitiveArray) -> VortexResult<PrimitiveArray> {
    let validity = parray.validity();
    Ok(match parray.ptype() {
        PType::U8 => zigzag_decode_primitive::<i8>(parray.into_maybe_null_slice(), validity),
        PType::U16 => zigzag_decode_primitive::<i16>(parray.into_maybe_null_slice(), validity),
        PType::U32 => zigzag_decode_primitive::<i32>(parray.into_maybe_null_slice(), validity),
        PType::U64 => zigzag_decode_primitive::<i64>(parray.into_maybe_null_slice(), validity),
        _ => vortex_bail!(
            "ZigZag can only decode unsigned integers, got {}",
            parray.ptype()
        ),
    })
}

fn zigzag_decode_primitive<T: ExternalZigZag + NativePType>(
    values: Vec<T::UInt>,
    validity: Validity,
) -> PrimitiveArray
where
    <T as ExternalZigZag>::UInt: NativePType,
{
    PrimitiveArray::from_vec(values.into_iter().map(|v| T::decode(v)).collect(), validity)
}

#[cfg(test)]
mod test {
    use vortex::encoding::ArrayEncoding;

    use super::*;
    use crate::ZigZagEncoding;

    #[test]
    fn test_compress() {
        let compressed = zigzag_encode(PrimitiveArray::from(Vec::from_iter(
            (-10_000..10_000).map(|i| i as i64),
        )))
        .unwrap();
        assert_eq!(compressed.as_ref().encoding().id(), ZigZagEncoding.id());
    }
}