vortex_array/array/sparse/
flatten.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
use itertools::Itertools;
use vortex_dtype::{match_each_native_ptype, DType, NativePType};
use vortex_error::{VortexError, VortexResult};
use vortex_scalar::ScalarValue;

use crate::array::primitive::PrimitiveArray;
use crate::array::sparse::SparseArray;
use crate::array::BoolArray;
use crate::validity::Validity;
use crate::{ArrayDType, Canonical, IntoArrayVariant, IntoCanonical};

impl IntoCanonical for SparseArray {
    fn into_canonical(self) -> VortexResult<Canonical> {
        // Resolve our indices into a vector of usize applying the offset
        let indices = self.resolved_indices();

        let validity_buffer =
            BooleanBufferBuilder::new_from_buffer(MutableBuffer::new_null(self.len()), self.len());

        if matches!(self.dtype(), DType::Bool(_)) {
            let values = self.values().into_bool()?.boolean_buffer();
            canonicalize_sparse_bools(
                values,
                &indices,
                self.len(),
                self.fill_value(),
                validity_buffer,
            )
        } else {
            let values = self.values().into_primitive()?;
            match_each_native_ptype!(values.ptype(), |$P| {
                canonicalize_sparse_primitives(
                    values.maybe_null_slice::<$P>(),
                    &indices,
                    self.len(),
                    self.fill_value(),
                    validity_buffer
                )
            })
        }
    }
}

fn canonicalize_sparse_bools(
    values: BooleanBuffer,
    indices: &[usize],
    len: usize,
    fill_value: &ScalarValue,
    mut validity_buffer: BooleanBufferBuilder,
) -> VortexResult<Canonical> {
    let fill_bool: bool = if fill_value.is_null() {
        bool::default()
    } else {
        fill_value.try_into()?
    };
    let mut flat_bools = vec![fill_bool; len];
    for (i, idx) in indices.iter().enumerate() {
        flat_bools[*idx] = values.value(i);
        validity_buffer.set_bit(*idx, true);
    }

    let validity = Validity::from(validity_buffer.finish());
    let bool_values = BoolArray::from_vec(flat_bools, validity);

    Ok(Canonical::Bool(bool_values))
}

fn canonicalize_sparse_primitives<
    T: NativePType + for<'a> TryFrom<&'a ScalarValue, Error = VortexError>,
>(
    values: &[T],
    indices: &[usize],
    len: usize,
    fill_value: &ScalarValue,
    mut validity: BooleanBufferBuilder,
) -> VortexResult<Canonical> {
    let primitive_fill = if fill_value.is_null() {
        T::default()
    } else {
        fill_value.try_into()?
    };
    let mut result = vec![primitive_fill; len];

    for (v, idx) in values.iter().zip_eq(indices) {
        result[*idx] = *v;
        validity.set_bit(*idx, true);
    }

    let validity = validity.finish();
    let array = if fill_value.is_null() {
        PrimitiveArray::from_vec(result, Validity::from(validity))
    } else {
        PrimitiveArray::from(result)
    };
    Ok(Canonical::Primitive(array))
}

#[cfg(test)]
mod test {
    use vortex_dtype::{DType, Nullability};

    use crate::array::sparse::SparseArray;
    use crate::array::BoolArray;
    use crate::validity::Validity;
    use crate::{ArrayDType, Canonical, IntoArray, IntoCanonical};

    #[test]
    fn test_sparse_bool() {
        let indices = vec![0u64].into_array();
        let values = BoolArray::from_vec(vec![true], Validity::NonNullable).into_array();
        let sparse_bools = SparseArray::try_new(indices, values, 10, true.into()).unwrap();
        assert_eq!(*sparse_bools.dtype(), DType::Bool(Nullability::NonNullable));
        let flat_bools = sparse_bools.into_canonical().unwrap();
        assert!(matches!(flat_bools, Canonical::Bool(_)));
    }
}