vortex_array/arrays/decimal/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4mod compute;
5mod narrow;
6mod ops;
7mod patch;
8mod serde;
9
10use arrow_buffer::BooleanBufferBuilder;
11use vortex_buffer::{Buffer, BufferMut, ByteBuffer};
12use vortex_dtype::{DType, DecimalDType};
13use vortex_error::{VortexExpect, VortexResult, vortex_ensure, vortex_panic};
14use vortex_scalar::{DecimalValueType, NativeDecimalType};
15
16pub use crate::arrays::decimal::narrow::narrowed_decimal;
17use crate::builders::ArrayBuilder;
18use crate::stats::{ArrayStats, StatsSetRef};
19use crate::validity::Validity;
20use crate::vtable::{
21    ArrayVTable, CanonicalVTable, NotSupported, VTable, ValidityHelper,
22    ValidityVTableFromValidityHelper, VisitorVTable,
23};
24use crate::{ArrayBufferVisitor, ArrayChildVisitor, Canonical, EncodingId, EncodingRef, vtable};
25
26vtable!(Decimal);
27
28impl VTable for DecimalVTable {
29    type Array = DecimalArray;
30    type Encoding = DecimalEncoding;
31
32    type ArrayVTable = Self;
33    type CanonicalVTable = Self;
34    type OperationsVTable = Self;
35    type ValidityVTable = ValidityVTableFromValidityHelper;
36    type VisitorVTable = Self;
37    type ComputeVTable = NotSupported;
38    type EncodeVTable = NotSupported;
39    type PipelineVTable = NotSupported;
40    type SerdeVTable = Self;
41
42    fn id(_encoding: &Self::Encoding) -> EncodingId {
43        EncodingId::new_ref("vortex.decimal")
44    }
45
46    fn encoding(_array: &Self::Array) -> EncodingRef {
47        EncodingRef::new_ref(DecimalEncoding.as_ref())
48    }
49}
50
51#[derive(Clone, Debug)]
52pub struct DecimalEncoding;
53
54/// Maps a decimal precision into the smallest type that can represent it.
55pub fn smallest_storage_type(decimal_dtype: &DecimalDType) -> DecimalValueType {
56    match decimal_dtype.precision() {
57        1..=2 => DecimalValueType::I8,
58        3..=4 => DecimalValueType::I16,
59        5..=9 => DecimalValueType::I32,
60        10..=18 => DecimalValueType::I64,
61        19..=38 => DecimalValueType::I128,
62        39..=76 => DecimalValueType::I256,
63        0 => unreachable!("precision must be greater than 0"),
64        p => unreachable!("precision larger than 76 is invalid found precision {p}"),
65    }
66}
67
68/// True if `value_type` can represent every value of the type `dtype`.
69pub fn compatible_storage_type(value_type: DecimalValueType, dtype: DecimalDType) -> bool {
70    value_type >= smallest_storage_type(&dtype)
71}
72
73/// A decimal array that stores fixed-precision decimal numbers with configurable scale.
74///
75/// This mirrors the Apache Arrow Decimal encoding and provides exact arithmetic for
76/// financial and scientific computations where floating-point precision loss is unacceptable.
77///
78/// ## Storage Format
79///
80/// Decimals are stored as scaled integers in a supported scalar value type.
81///
82/// The precisions supported for each scalar type are:
83/// - **i8**: precision 1-2 digits
84/// - **i16**: precision 3-4 digits
85/// - **i32**: precision 5-9 digits
86/// - **i64**: precision 10-18 digits
87/// - **i128**: precision 19-38 digits
88/// - **i256**: precision 39-76 digits
89///
90/// These are just the maximal ranges for each scalar type, but it is perfectly legal to store
91/// values with precision that does not match this exactly. For example, a valid DecimalArray with
92/// precision=39 may store its values in an `i8` if all of the actual values fit into it.
93///
94/// Similarly, a `DecimalArray` can be built that stores a set of precision=2 values in a
95/// `Buffer<i256>`.
96///
97/// ## Precision and Scale
98///
99/// - **Precision**: Total number of significant digits (1-76, u8 range)
100/// - **Scale**: Number of digits after the decimal point (-128 to 127, i8 range)
101/// - **Value**: `stored_integer / 10^scale`
102///
103/// For example, with precision=5 and scale=2:
104/// - Stored value 12345 represents 123.45
105/// - Range: -999.99 to 999.99
106///
107/// ## Valid Scalar Types
108///
109/// The underlying storage uses these native types based on precision:
110/// - `DecimalValueType::I8`, `I16`, `I32`, `I64`, `I128`, `I256`
111/// - Type selection is automatic based on the required precision
112///
113/// # Examples
114///
115/// ```
116/// use vortex_array::arrays::DecimalArray;
117/// use vortex_dtype::DecimalDType;
118/// use vortex_buffer::{buffer, Buffer};
119/// use vortex_array::validity::Validity;
120///
121/// // Create a decimal array with precision=5, scale=2 (e.g., 123.45)
122/// let decimal_dtype = DecimalDType::new(5, 2);
123/// let values = buffer![12345i32, 67890i32, -12300i32]; // 123.45, 678.90, -123.00
124/// let array = DecimalArray::new(values, decimal_dtype, Validity::NonNullable);
125///
126/// assert_eq!(array.precision(), 5);
127/// assert_eq!(array.scale(), 2);
128/// assert_eq!(array.len(), 3);
129/// ```
130#[derive(Clone, Debug)]
131pub struct DecimalArray {
132    dtype: DType,
133    values: ByteBuffer,
134    values_type: DecimalValueType,
135    validity: Validity,
136    stats_set: ArrayStats,
137}
138
139impl DecimalArray {
140    /// Creates a new [`DecimalArray`].
141    ///
142    /// # Panics
143    ///
144    /// Panics if the provided components do not satisfy the invariants documented in
145    /// [`DecimalArray::new_unchecked`].
146    pub fn new<T: NativeDecimalType>(
147        buffer: Buffer<T>,
148        decimal_dtype: DecimalDType,
149        validity: Validity,
150    ) -> Self {
151        Self::try_new(buffer, decimal_dtype, validity)
152            .vortex_expect("DecimalArray construction failed")
153    }
154
155    /// Constructs a new `DecimalArray`.
156    ///
157    /// See [`DecimalArray::new_unchecked`] for more information.
158    ///
159    /// # Errors
160    ///
161    /// Returns an error if the provided components do not satisfy the invariants documented in
162    /// [`DecimalArray::new_unchecked`].
163    pub fn try_new<T: NativeDecimalType>(
164        buffer: Buffer<T>,
165        decimal_dtype: DecimalDType,
166        validity: Validity,
167    ) -> VortexResult<Self> {
168        Self::validate(&buffer, &validity)?;
169
170        // SAFETY: validate ensures all invariants are met.
171        Ok(unsafe { Self::new_unchecked(buffer, decimal_dtype, validity) })
172    }
173
174    /// Creates a new [`DecimalArray`] without validation from these components:
175    ///
176    /// * `buffer` is a typed buffer containing the decimal values.
177    /// * `decimal_dtype` specifies the decimal precision and scale.
178    /// * `validity` holds the null values.
179    ///
180    /// # Safety
181    ///
182    /// The caller must ensure all of the following invariants are satisfied:
183    ///
184    /// - All non-null values in `buffer` must be representable within the specified precision.
185    /// - For example, with precision=5 and scale=2, all values must be in range [-999.99, 999.99].
186    /// - If `validity` is [`Validity::Array`], its length must exactly equal `buffer.len()`.
187    pub unsafe fn new_unchecked<T: NativeDecimalType>(
188        buffer: Buffer<T>,
189        decimal_dtype: DecimalDType,
190        validity: Validity,
191    ) -> Self {
192        Self {
193            values: buffer.into_byte_buffer(),
194            values_type: T::VALUES_TYPE,
195            dtype: DType::Decimal(decimal_dtype, validity.nullability()),
196            validity,
197            stats_set: Default::default(),
198        }
199    }
200
201    /// Validates the components that would be used to create a [`DecimalArray`].
202    ///
203    /// This function checks all the invariants required by [`DecimalArray::new_unchecked`].
204    pub(crate) fn validate<T: NativeDecimalType>(
205        buffer: &Buffer<T>,
206        validity: &Validity,
207    ) -> VortexResult<()> {
208        if let Some(len) = validity.maybe_len() {
209            vortex_ensure!(
210                buffer.len() == len,
211                "Buffer and validity length mismatch: buffer={}, validity={}",
212                buffer.len(),
213                len,
214            );
215        }
216
217        Ok(())
218    }
219
220    /// Returns the underlying [`ByteBuffer`] of the array.
221    pub fn byte_buffer(&self) -> ByteBuffer {
222        self.values.clone()
223    }
224
225    pub fn buffer<T: NativeDecimalType>(&self) -> Buffer<T> {
226        if self.values_type != T::VALUES_TYPE {
227            vortex_panic!(
228                "Cannot extract Buffer<{:?}> for DecimalArray with values_type {:?}",
229                T::VALUES_TYPE,
230                self.values_type,
231            );
232        }
233        Buffer::<T>::from_byte_buffer(self.values.clone())
234    }
235
236    /// Returns the decimal type information
237    pub fn decimal_dtype(&self) -> DecimalDType {
238        if let DType::Decimal(decimal_dtype, _) = self.dtype {
239            decimal_dtype
240        } else {
241            vortex_panic!("Expected Decimal dtype, got {:?}", self.dtype)
242        }
243    }
244
245    pub fn values_type(&self) -> DecimalValueType {
246        self.values_type
247    }
248
249    pub fn precision(&self) -> u8 {
250        self.decimal_dtype().precision()
251    }
252
253    pub fn scale(&self) -> i8 {
254        self.decimal_dtype().scale()
255    }
256
257    pub fn from_option_iter<T: NativeDecimalType, I: IntoIterator<Item = Option<T>>>(
258        iter: I,
259        decimal_dtype: DecimalDType,
260    ) -> Self {
261        let iter = iter.into_iter();
262        let mut values = BufferMut::with_capacity(iter.size_hint().0);
263        let mut validity = BooleanBufferBuilder::new(values.capacity());
264
265        for i in iter {
266            match i {
267                None => {
268                    validity.append(false);
269                    values.push(T::default());
270                }
271                Some(e) => {
272                    validity.append(true);
273                    values.push(e);
274                }
275            }
276        }
277        Self::new(
278            values.freeze(),
279            decimal_dtype,
280            Validity::from(validity.finish()),
281        )
282    }
283}
284
285impl ArrayVTable<DecimalVTable> for DecimalVTable {
286    fn len(array: &DecimalArray) -> usize {
287        let divisor = match array.values_type {
288            DecimalValueType::I8 => 1,
289            DecimalValueType::I16 => 2,
290            DecimalValueType::I32 => 4,
291            DecimalValueType::I64 => 8,
292            DecimalValueType::I128 => 16,
293            DecimalValueType::I256 => 32,
294            ty => vortex_panic!("unknown decimal value type {:?}", ty),
295        };
296        array.values.len() / divisor
297    }
298
299    fn dtype(array: &DecimalArray) -> &DType {
300        &array.dtype
301    }
302
303    fn stats(array: &DecimalArray) -> StatsSetRef<'_> {
304        array.stats_set.to_ref(array.as_ref())
305    }
306}
307
308impl VisitorVTable<DecimalVTable> for DecimalVTable {
309    fn visit_buffers(array: &DecimalArray, visitor: &mut dyn ArrayBufferVisitor) {
310        visitor.visit_buffer(&array.values);
311    }
312
313    fn visit_children(array: &DecimalArray, visitor: &mut dyn ArrayChildVisitor) {
314        visitor.visit_validity(array.validity(), array.len())
315    }
316}
317
318impl CanonicalVTable<DecimalVTable> for DecimalVTable {
319    fn canonicalize(array: &DecimalArray) -> Canonical {
320        Canonical::Decimal(array.clone())
321    }
322
323    fn append_to_builder(array: &DecimalArray, builder: &mut dyn ArrayBuilder) {
324        builder.extend_from_array(array.as_ref())
325    }
326}
327
328impl ValidityHelper for DecimalArray {
329    fn validity(&self) -> &Validity {
330        &self.validity
331    }
332}
333
334#[cfg(test)]
335mod test {
336    use arrow_array::Decimal128Array;
337
338    #[test]
339    fn test_decimal() {
340        // They pass it b/c the DType carries the information. No other way to carry a
341        // dtype except via the array.
342        let value = Decimal128Array::new_null(100);
343        let numeric = value.value(10);
344        assert_eq!(numeric, 0i128);
345    }
346}