vortex_array/arrays/decimal/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod compute;
5mod ops;
6mod patch;
7mod serde;
8
9use arrow_buffer::BooleanBufferBuilder;
10use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
11use vortex_dtype::{DType, DecimalDType};
12use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
13use vortex_scalar::{DecimalValueType, NativeDecimalType};
14
15use crate::builders::ArrayBuilder;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::validity::Validity;
18use crate::vtable::{
19    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
20    ValidityVTableFromValidityHelper, VisitorVTable,
21};
22use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};
23
24vtable!(Decimal);
25
26impl VTable for DecimalVTable {
27    type Array = DecimalArray;
28    type Encoding = DecimalEncoding;
29
30    type ArrayVTable = Self;
31    type CanonicalVTable = Self;
32    type OperationsVTable = Self;
33    type ValidityVTable = ValidityVTableFromValidityHelper;
34    type VisitorVTable = Self;
35    type ComputeVTable = NotSupported;
36    type EncodeVTable = NotSupported;
37    type PipelineVTable = NotSupported;
38    type SerdeVTable = Self;
39
40    fn id(_encoding: &Self::Encoding) -> EncodingId {
41        EncodingId::new_ref("vortex.decimal")
42    }
43
44    fn encoding(_array: &Self::Array) -> EncodingRef {
45        EncodingRef::new_ref(DecimalEncoding.as_ref())
46    }
47}
48
49#[derive(Clone, Debug)]
50pub struct DecimalEncoding;
51
52/// Maps a decimal precision into the smallest type that can represent it.
53pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {
54    match decimal_dtype.precision() {
55        1..=2 => DecimalValueType::I8,
56        3..=4 => DecimalValueType::I16,
57        5..=9 => DecimalValueType::I32,
58        10..=18 => DecimalValueType::I64,
59        19..=38 => DecimalValueType::I128,
60        39..=76 => DecimalValueType::I256,
61        0 => unreachable!("precision must be greater than 0"),
62        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
63    }
64}
65
66/// True if `value_type` can represent every value of the type `dtype`.
67pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {
68    value_type >= smallest_storage_type(&dtype)
69}
70
71/// A decimal array that stores fixed-precision decimal numbers with configurable scale.
72///
73/// This mirrors the Apache Arrow Decimal encoding and provides exact arithmetic for
74/// financial and scientific computations where floating-point precision loss is unacceptable.
75///
76/// ## Storage Format
77///
78/// Decimals are stored as scaled integers in a supported scalar value type.
79///
80/// The precisions supported for each scalar type are:
81/// - **i8**: precision 1-2 digits
82/// - **i16**: precision 3-4 digits
83/// - **i32**: precision 5-9 digits
84/// - **i64**: precision 10-18 digits
85/// - **i128**: precision 19-38 digits
86/// - **i256**: precision 39-76 digits
87///
88/// These are just the maximal ranges for each scalar type, but it is perfectly legal to store
89/// values with precision that does not match this exactly. For example, a valid DecimalArray with
90/// precision=39 may store its values in an `i8` if all of the actual values fit into it.
91///
92/// Similarly, a `DecimalArray` can be built that stores a set of precision=2 values in a
93/// `Buffer<i256>`.
94///
95/// ## Precision and Scale
96///
97/// - **Precision**: Total number of significant digits (1-76, u8 range)
98/// - **Scale**: Number of digits after the decimal point (-128 to 127, i8 range)
99/// - **Value**: `stored_integer / 10^scale`
100///
101/// For example, with precision=5 and scale=2:
102/// - Stored value 12345 represents 123.45
103/// - Range: -999.99 to 999.99
104///
105/// ## Valid Scalar Types
106///
107/// The underlying storage uses these native types based on precision:
108/// - `DecimalValueType::I8`, `I16`, `I32`, `I64`, `I128`, `I256`
109/// - Type selection is automatic based on the required precision
110///
111/// # Examples
112///
113/// ```
114/// use vortex_array::arrays::DecimalArray;
115/// use vortex_dtype::DecimalDType;
116/// use vortex_buffer::{buffer, Buffer};
117/// use vortex_array::validity::Validity;
118///
119/// // Create a decimal array with precision=5, scale=2 (e.g., 123.45)
120/// let decimal_dtype = DecimalDType::new(5, 2);
121/// let values = buffer![12345i32, 67890i32, -12300i32]; // 123.45, 678.90, -123.00
122/// let array = DecimalArray::new(values, decimal_dtype, Validity::NonNullable);
123///
124/// assert_eq!(array.precision(), 5);
125/// assert_eq!(array.scale(), 2);
126/// assert_eq!(array.len(), 3);
127/// ```
128#[derive(Clone, Debug)]
129pub struct DecimalArray {
130    dtype: DType,
131    values: ByteBuffer,
132    values_type: DecimalValueType,
133    validity: Validity,
134    stats_set: ArrayStats,
135}
136
137impl DecimalArray {
138    fn validate<T: NativeDecimalType>(buffer: &Buffer<T>, validity: &Validity) -> VortexResult<()> {
139        if let Some(len) = validity.maybe_len() {
140            vortex_ensure!(
141                buffer.len() == len,
142                "Buffer and validity length mismatch: buffer={}, validity={}",
143                buffer.len(),
144                len,
145            );
146        }
147
148        Ok(())
149    }
150}
151
152impl DecimalArray {
153    /// Creates a new [`DecimalArray`] from a [`Buffer`] and [`Validity`], without checking
154    /// any invariants.
155    ///
156    /// # Panics
157    ///
158    /// Panics if the provided buffer and validity differ in length.
159    ///
160    /// See also [`DecimalArray::try_new`].
161    pub fn new<T: NativeDecimalType>(
162        buffer: Buffer<T>,
163        decimal_dtype: DecimalDType,
164        validity: Validity,
165    ) -> Self {
166        Self::try_new(buffer, decimal_dtype, validity).vortex_expect("DecimalArray new")
167    }
168
169    /// Build a new `DecimalArray` from a component `buffer`, decimal_dtype` and `validity`.
170    ///
171    /// This constructor validates the length of the buffer and validity are equal, returning
172    /// an error otherwise.
173    ///
174    /// See [`DecimalArray::new`] for an infallible constructor that panics on validation errors.
175    pub fn try_new<T: NativeDecimalType>(
176        buffer: Buffer<T>,
177        decimal_dtype: DecimalDType,
178        validity: Validity,
179    ) -> VortexResult<Self> {
180        Self::validate(&buffer, &validity)?;
181
182        Ok(Self {
183            values: buffer.into_byte_buffer(),
184            values_type: T::VALUES_TYPE,
185            dtype: DType::Decimal(decimal_dtype, validity.nullability()),
186            validity,
187            stats_set: Default::default(),
188        })
189    }
190
191    /// Returns the underlying [`ByteBuffer`] of the array.
192    pub fn byte_buffer(&self) -> ByteBuffer {
193        self.values.clone()
194    }
195
196    pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {
197        if self.values_type != T::VALUES_TYPE {
198            vortex_panic!(
199                "Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",
200                T::VALUES_TYPE,
201                self.values_type,
202            );
203        }
204        Buffer::<T>::from_byte_buffer(self.values.clone())
205    }
206
207    /// Returns the decimal type information
208    pub fn decimal_dtype(&self) -> DecimalDType {
209        if let DType::Decimal(decimal_dtype, _) = self.dtype {
210            decimal_dtype
211        } else {
212            vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype)
213        }
214    }
215
216    pub fn values_type(&self) -> DecimalValueType {
217        self.values_type
218    }
219
220    pub fn precision(&self) -> u8 {
221        self.decimal_dtype().precision()
222    }
223
224    pub fn scale(&self) -> i8 {
225        self.decimal_dtype().scale()
226    }
227
228    pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(
229        iter: I,
230        decimal_dtype: DecimalDType,
231    ) -> Self {
232        let iter = iter.into_iter();
233        let mut values = BufferMut::with_capacity(iter.size_hint().0);
234        let mut validity = BooleanBufferBuilder::new(values.capacity());
235
236        for i in iter {
237            match i {
238                None => {
239                    validity.append(false);
240                    values.push(T::default());
241                }
242                Some(e) => {
243                    validity.append(true);
244                    values.push(e);
245                }
246            }
247        }
248        Self::new(
249            values.freeze(),
250            decimal_dtype,
251            Validity::from(validity.finish()),
252        )
253    }
254}
255
256impl ArrayVTable<DecimalVTable> for DecimalVTable {
257    fn len(array: &DecimalArray) -> usize {
258        let divisor = match array.values_type {
259            DecimalValueType::I8 => 1,
260            DecimalValueType::I16 => 2,
261            DecimalValueType::I32 => 4,
262            DecimalValueType::I64 => 8,
263            DecimalValueType::I128 => 16,
264            DecimalValueType::I256 => 32,
265            ty => vortex_panic!("unknown decimal value type {:?}", ty),
266        };
267        array.values.len() / divisor
268    }
269
270    fn dtype(array: &DecimalArray) -> &DType {
271        &array.dtype
272    }
273
274    fn stats(array: &DecimalArray) -> StatsSetRef<'_> {
275        array.stats_set.to_ref(array.as_ref())
276    }
277}
278
279impl VisitorVTable<DecimalVTable> for DecimalVTable {
280    fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {
281        visitor.visit_buffer(&array.values);
282    }
283
284    fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {
285        visitor.visit_validity(array.validity(), array.len())
286    }
287}
288
289impl CanonicalVTable<DecimalVTable> for DecimalVTable {
290    fn canonicalize(array: &DecimalArray) -> VortexResult<Canonical> {
291        Ok(Canonical::Decimal(array.clone()))
292    }
293
294    fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
295        builder.extend_from_array(array.as_ref())
296    }
297}
298
299impl ValidityHelper for DecimalArray {
300    fn validity(&self) -> &Validity {
301        &self.validity
302    }
303}
304
305#[cfg(test)]
306mod test {
307    use arrow_array::Decimal128Array;
308
309    #[test]
310    fn test_decimal() {
311        // They pass it b/c the DType carries the information. No other way to carry a
312        // dtype except via the array.
313        let value = Decimal128Array::new_null(100);
314        let numeric = value.value(10);
315        assert_eq!(numeric, 0i128);
316    }
317}