vortex_array/arrays/decimal/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod compute;
5mod ops;
6mod patch;
7mod serde;
8
9use arrow_buffer::BooleanBufferBuilder;
10use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
11use vortex_dtype::{DType, DecimalDType};
12use vortex_error::{VortexResult, vortex_panic};
13use vortex_scalar::{DecimalValueType, NativeDecimalType};
14
15use crate::builders::ArrayBuilder;
16use crate::stats::{ArrayStats, StatsSetRef};
17use crate::validity::Validity;
18use crate::vtable::{
19    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
20    ValidityVTableFromValidityHelper, VisitorVTable,
21};
22use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};
23
24vtable!(Decimal);
25
26impl VTable for DecimalVTable {
27    type Array = DecimalArray;
28    type Encoding = DecimalEncoding;
29
30    type ArrayVTable = Self;
31    type CanonicalVTable = Self;
32    type OperationsVTable = Self;
33    type ValidityVTable = ValidityVTableFromValidityHelper;
34    type VisitorVTable = Self;
35    type ComputeVTable = NotSupported;
36    type EncodeVTable = NotSupported;
37    type SerdeVTable = Self;
38
39    fn id(_encoding: &Self::Encoding) -> EncodingId {
40        EncodingId::new_ref("vortex.decimal")
41    }
42
43    fn encoding(_array: &Self::Array) -> EncodingRef {
44        EncodingRef::new_ref(DecimalEncoding.as_ref())
45    }
46}
47
48#[derive(Clone, Debug)]
49pub struct DecimalEncoding;
50
51/// Maps a decimal precision into the smallest type that can represent it.
52pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {
53    match decimal_dtype.precision() {
54        1..=2 => DecimalValueType::I8,
55        3..=4 => DecimalValueType::I16,
56        5..=9 => DecimalValueType::I32,
57        10..=18 => DecimalValueType::I64,
58        19..=38 => DecimalValueType::I128,
59        39..=76 => DecimalValueType::I256,
60        0 => unreachable!("precision must be greater than 0"),
61        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
62    }
63}
64
65/// True if `value_type` can represent every value of the type `dtype`.
66pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {
67    value_type >= smallest_storage_type(&dtype)
68}
69
70/// A decimal array that stores fixed-precision decimal numbers with configurable scale.
71///
72/// This mirrors the Apache Arrow Decimal encoding and provides exact arithmetic for
73/// financial and scientific computations where floating-point precision loss is unacceptable.
74///
75/// ## Storage Format
76///
77/// Decimals are stored as scaled integers in a supported scalar value type.
78///
79/// The precisions supported for each scalar type are:
80/// - **i8**: precision 1-2 digits
81/// - **i16**: precision 3-4 digits  
82/// - **i32**: precision 5-9 digits
83/// - **i64**: precision 10-18 digits
84/// - **i128**: precision 19-38 digits
85/// - **i256**: precision 39-76 digits
86///
87/// These are just the maximal ranges for each scalar type, but it is perfectly legal to store
88/// values with precision that does not match this exactly. For example, a valid DecimalArray with
89/// precision=39 may store its values in an `i8` if all of the actual values fit into it.
90///
91/// Similarly, a `DecimalArray` can be built that stores a set of precision=2 values in a
92/// `Buffer<i256>`.
93///
94/// ## Precision and Scale
95///
96/// - **Precision**: Total number of significant digits (1-76, u8 range)
97/// - **Scale**: Number of digits after the decimal point (-128 to 127, i8 range)
98/// - **Value**: `stored_integer / 10^scale`
99///
100/// For example, with precision=5 and scale=2:
101/// - Stored value 12345 represents 123.45
102/// - Range: -999.99 to 999.99
103///
104/// ## Valid Scalar Types
105///
106/// The underlying storage uses these native types based on precision:
107/// - `DecimalValueType::I8`, `I16`, `I32`, `I64`, `I128`, `I256`
108/// - Type selection is automatic based on the required precision
109///
110/// # Examples
111///
112/// ```
113/// use vortex_array::arrays::DecimalArray;
114/// use vortex_dtype::DecimalDType;
115/// use vortex_buffer::{buffer, Buffer};
116/// use vortex_array::validity::Validity;
117///
118/// // Create a decimal array with precision=5, scale=2 (e.g., 123.45)
119/// let decimal_dtype = DecimalDType::new(5, 2);
120/// let values = buffer![12345i32, 67890i32, -12300i32]; // 123.45, 678.90, -123.00
121/// let array = DecimalArray::new(values, decimal_dtype, Validity::NonNullable);
122///
123/// assert_eq!(array.precision(), 5);
124/// assert_eq!(array.scale(), 2);
125/// assert_eq!(array.len(), 3);
126/// ```
127#[derive(Clone, Debug)]
128pub struct DecimalArray {
129    dtype: DType,
130    values: ByteBuffer,
131    values_type: DecimalValueType,
132    validity: Validity,
133    stats_set: ArrayStats,
134}
135
136impl DecimalArray {
137    /// Creates a new [`DecimalArray`] from a [`Buffer`] and [`Validity`], without checking
138    /// any invariants.
139    ///
140    /// # Panics
141    ///
142    /// Panics if the validity length is not compatible with the buffer length.
143    pub fn new<T: NativeDecimalType>(
144        buffer: Buffer<T>,
145        decimal_dtype: DecimalDType,
146        validity: Validity,
147    ) -> Self {
148        if let Some(len) = validity.maybe_len()
149            && buffer.len() != len
150        {
151            vortex_panic!(
152                "Buffer and validity length mismatch: buffer={}, validity={}",
153                buffer.len(),
154                len,
155            );
156        }
157
158        Self {
159            dtype: DType::Decimal(decimal_dtype, validity.nullability()),
160            values: buffer.into_byte_buffer(),
161            values_type: T::VALUES_TYPE,
162            validity,
163            stats_set: ArrayStats::default(),
164        }
165    }
166
167    /// Returns the underlying [`ByteBuffer`] of the array.
168    pub fn byte_buffer(&self) -> ByteBuffer {
169        self.values.clone()
170    }
171
172    pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {
173        if self.values_type != T::VALUES_TYPE {
174            vortex_panic!(
175                "Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",
176                T::VALUES_TYPE,
177                self.values_type,
178            );
179        }
180        Buffer::<T>::from_byte_buffer(self.values.clone())
181    }
182
183    /// Returns the decimal type information
184    pub fn decimal_dtype(&self) -> DecimalDType {
185        match &self.dtype {
186            DType::Decimal(decimal_dtype, _) => *decimal_dtype,
187            _ => vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype),
188        }
189    }
190
191    pub fn values_type(&self) -> DecimalValueType {
192        self.values_type
193    }
194
195    pub fn precision(&self) -> u8 {
196        self.decimal_dtype().precision()
197    }
198
199    pub fn scale(&self) -> i8 {
200        self.decimal_dtype().scale()
201    }
202
203    pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(
204        iter: I,
205        decimal_dtype: DecimalDType,
206    ) -> Self {
207        let iter = iter.into_iter();
208        let mut values = BufferMut::with_capacity(iter.size_hint().0);
209        let mut validity = BooleanBufferBuilder::new(values.capacity());
210
211        for i in iter {
212            match i {
213                None => {
214                    validity.append(false);
215                    values.push(T::default());
216                }
217                Some(e) => {
218                    validity.append(true);
219                    values.push(e);
220                }
221            }
222        }
223        Self::new(
224            values.freeze(),
225            decimal_dtype,
226            Validity::from(validity.finish()),
227        )
228    }
229}
230
231impl ArrayVTable<DecimalVTable> for DecimalVTable {
232    fn len(array: &DecimalArray) -> usize {
233        let divisor = match array.values_type {
234            DecimalValueType::I8 => 1,
235            DecimalValueType::I16 => 2,
236            DecimalValueType::I32 => 4,
237            DecimalValueType::I64 => 8,
238            DecimalValueType::I128 => 16,
239            DecimalValueType::I256 => 32,
240            ty => vortex_panic!("unknown decimal value type {:?}", ty),
241        };
242        array.values.len() / divisor
243    }
244
245    fn dtype(array: &DecimalArray) -> &DType {
246        &array.dtype
247    }
248
249    fn stats(array: &DecimalArray) -> StatsSetRef<'_> {
250        array.stats_set.to_ref(array.as_ref())
251    }
252}
253
254impl VisitorVTable<DecimalVTable> for DecimalVTable {
255    fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {
256        visitor.visit_buffer(&array.values);
257    }
258
259    fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {
260        visitor.visit_validity(array.validity(), array.len())
261    }
262}
263
264impl CanonicalVTable<DecimalVTable> for DecimalVTable {
265    fn canonicalize(array: &DecimalArray) -> VortexResult<Canonical> {
266        Ok(Canonical::Decimal(array.clone()))
267    }
268
269    fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) -> VortexResult<()> {
270        builder.extend_from_array(array.as_ref())
271    }
272}
273
274impl ValidityHelper for DecimalArray {
275    fn validity(&self) -> &Validity {
276        &self.validity
277    }
278}
279
280#[cfg(test)]
281mod test {
282    use arrow_array::Decimal128Array;
283
284    #[test]
285    fn test_decimal() {
286        // They pass it b/c the DType carries the information. No other way to carry a
287        // dtype except via the array.
288        let value = Decimal128Array::new_null(100);
289        let numeric = value.value(10);
290        assert_eq!(numeric, 0i128);
291    }
292}