vortex_array/arrays/decimal/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod compute;
5mod ops;
6mod patch;
7mod serde;
8
9use arrow_buffer::BooleanBufferBuilder;
10use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
11use vortex_dtype::{DType, DecimalDType};
12use vortex_error::{VortexResult, vortex_panic};
13use vortex_scalar::{DecimalValueType, NativeDecimalType};
14
15use crate::builders::ArrayBuilder;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::validity::Validity;
18use crate::vtable::{
19    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
20    ValidityVTableFromValidityHelper, VisitorVTable,
21};
22use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};
23
24vtable!(Decimal);
25
26impl VTable for DecimalVTable {
27    type Array = DecimalArray;
28    type Encoding = DecimalEncoding;
29
30    type ArrayVTable = Self;
31    type CanonicalVTable = Self;
32    type OperationsVTable = Self;
33    type ValidityVTable = ValidityVTableFromValidityHelper;
34    type VisitorVTable = Self;
35    type ComputeVTable = NotSupported;
36    type EncodeVTable = NotSupported;
37    type SerdeVTable = Self;
38
39    fn id(_encoding: &Self::Encoding) -> EncodingId {
40        EncodingId::new_ref("vortex.decimal")
41    }
42
43    fn encoding(_array: &Self::Array) -> EncodingRef {
44        EncodingRef::new_ref(DecimalEncoding.as_ref())
45    }
46}
47
48#[derive(Clone, Debug)]
49pub struct DecimalEncoding;
50
51/// Maps a decimal precision into the smallest type that can represent it.
52pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {
53    match decimal_dtype.precision() {
54        1..=2 => DecimalValueType::I8,
55        3..=4 => DecimalValueType::I16,
56        5..=9 => DecimalValueType::I32,
57        10..=18 => DecimalValueType::I64,
58        19..=38 => DecimalValueType::I128,
59        39..=76 => DecimalValueType::I256,
60        0 => unreachable!("precision must be greater than 0"),
61        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
62    }
63}
64
65/// True if `value_type` can represent every value of the type `dtype`.
66pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {
67    value_type >= smallest_storage_type(&dtype)
68}
69
70/// Array for decimal-typed real numbers
71#[derive(Clone, Debug)]
72pub struct DecimalArray {
73    dtype: DType,
74    values: ByteBuffer,
75    values_type: DecimalValueType,
76    validity: Validity,
77    stats_set: ArrayStats,
78}
79
80impl DecimalArray {
81    /// Creates a new [`DecimalArray`] from a [`Buffer`] and [`Validity`], without checking
82    /// any invariants.
83    ///
84    /// # Panics
85    ///
86    /// Panics if the validity length is not compatible with the buffer length.
87    pub fn new<T: NativeDecimalType>(
88        buffer: Buffer<T>,
89        decimal_dtype: DecimalDType,
90        validity: Validity,
91    ) -> Self {
92        if let Some(len) = validity.maybe_len() {
93            if buffer.len() != len {
94                vortex_panic!(
95                    "Buffer and validity length mismatch: buffer={}, validity={}",
96                    buffer.len(),
97                    len,
98                );
99            }
100        }
101
102        Self {
103            dtype: DType::Decimal(decimal_dtype, validity.nullability()),
104            values: buffer.into_byte_buffer(),
105            values_type: T::VALUES_TYPE,
106            validity,
107            stats_set: ArrayStats::default(),
108        }
109    }
110
111    /// Returns the underlying [`ByteBuffer`] of the array.
112    pub fn byte_buffer(&self) -> ByteBuffer {
113        self.values.clone()
114    }
115
116    pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {
117        if self.values_type != T::VALUES_TYPE {
118            vortex_panic!(
119                "Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",
120                T::VALUES_TYPE,
121                self.values_type,
122            );
123        }
124        Buffer::<T>::from_byte_buffer(self.values.clone())
125    }
126
127    /// Returns the decimal type information
128    pub fn decimal_dtype(&self) -> DecimalDType {
129        match &self.dtype {
130            DType::Decimal(decimal_dtype, _) => *decimal_dtype,
131            _ => vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype),
132        }
133    }
134
135    pub fn values_type(&self) -> DecimalValueType {
136        self.values_type
137    }
138
139    pub fn precision(&self) -> u8 {
140        self.decimal_dtype().precision()
141    }
142
143    pub fn scale(&self) -> i8 {
144        self.decimal_dtype().scale()
145    }
146
147    pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(
148        iter: I,
149        decimal_dtype: DecimalDType,
150    ) -> Self {
151        let iter = iter.into_iter();
152        let mut values = BufferMut::with_capacity(iter.size_hint().0);
153        let mut validity = BooleanBufferBuilder::new(values.capacity());
154
155        for i in iter {
156            match i {
157                None => {
158                    validity.append(false);
159                    values.push(T::default());
160                }
161                Some(e) => {
162                    validity.append(true);
163                    values.push(e);
164                }
165            }
166        }
167        Self::new(
168            values.freeze(),
169            decimal_dtype,
170            Validity::from(validity.finish()),
171        )
172    }
173}
174
175impl ArrayVTable<DecimalVTable> for DecimalVTable {
176    fn len(array: &DecimalArray) -> usize {
177        let divisor = match array.values_type {
178            DecimalValueType::I8 => 1,
179            DecimalValueType::I16 => 2,
180            DecimalValueType::I32 => 4,
181            DecimalValueType::I64 => 8,
182            DecimalValueType::I128 => 16,
183            DecimalValueType::I256 => 32,
184            ty => vortex_panic!("unknown decimal value type {:?}", ty),
185        };
186        array.values.len() / divisor
187    }
188
189    fn dtype(array: &DecimalArray) -> &DType {
190        &array.dtype
191    }
192
193    fn stats(array: &DecimalArray) -> StatsSetRef<'_> {
194        array.stats_set.to_ref(array.as_ref())
195    }
196}
197
198impl VisitorVTable<DecimalVTable> for DecimalVTable {
199    fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {
200        visitor.visit_buffer(&array.values);
201    }
202
203    fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {
204        visitor.visit_validity(array.validity(), array.len())
205    }
206}
207
208impl CanonicalVTable<DecimalVTable> for DecimalVTable {
209    fn canonicalize(array: &DecimalArray) -> VortexResult<Canonical> {
210        Ok(Canonical::Decimal(array.clone()))
211    }
212
213    fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
214        builder.extend_from_array(array.as_ref())
215    }
216}
217
218impl ValidityHelper for DecimalArray {
219    fn validity(&self) -> &Validity {
220        &self.validity
221    }
222}
223
224#[cfg(test)]
225mod test {
226    use arrow_array::Decimal128Array;
227
228    #[test]
229    fn test_decimal() {
230        // They pass it b/c the DType carries the information. No other way to carry a
231        // dtype except via the array.
232        let value = Decimal128Array::new_null(100);
233        let numeric = value.value(10);
234        assert_eq!(numeric, 0i128);
235    }
236}