vortex_vector/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4// TODO(connor): Explain what vectors are, why we need them for the new operator model of arrays,
5// differences from Arrow (builders and arrays and scalars), etc.
6//! Immutable and mutable decompressed (canonical) vectors for Vortex.
7
8#![deny(clippy::missing_errors_doc)]
9#![deny(clippy::missing_panics_doc)]
10#![deny(clippy::missing_safety_doc)]
11#![deny(missing_docs)]
12
13pub mod binaryview;
14pub mod bool;
15pub mod decimal;
16pub mod fixed_size_list;
17pub mod listview;
18pub mod null;
19pub mod primitive;
20pub mod struct_;
21
22mod datum;
23mod scalar;
24mod scalar_ops;
25mod vector;
26mod vector_mut;
27mod vector_ops;
28
29pub use datum::*;
30pub use scalar::Scalar;
31pub use scalar_ops::ScalarOps;
32pub use vector::Vector;
33pub use vector_mut::VectorMut;
34pub use vector_ops::VectorMutOps;
35pub use vector_ops::VectorOps;
36use vortex_dtype::DType;
37
38mod macros;
39mod private;
40mod scalar_macros;
41
42/// Returns true if the datum is compatible with the provided data type.
43pub fn datum_matches_dtype(datum: &Datum, dtype: &DType) -> bool {
44    match datum {
45        Datum::Scalar(scalar) => scalar_matches_dtype(scalar, dtype),
46        Datum::Vector(vector) => vector_matches_dtype(vector, dtype),
47    }
48}
49
50/// Returns true if the vector is compatible with the provided data type.
51///
52/// This means that the vector's physical representation is compatible with the data type,
53/// typically meaning the enum variants match. In the case of nested types, this function
54/// recursively checks the child types.
55///
56/// This function also checks that if the data type is non-nullable, the vector contains no nulls,
57pub fn vector_matches_dtype(vector: &Vector, dtype: &DType) -> bool {
58    if !dtype.is_nullable() && vector.validity().false_count() > 0 {
59        // Non-nullable dtype cannot have nulls in the vector.
60        return false;
61    }
62
63    // Note that we don't match a tuple here to make sure we have an exhaustive match that will
64    // fail to compile if we ever add new DTypes.
65    match dtype {
66        DType::Null => {
67            matches!(vector, Vector::Null(_))
68        }
69        DType::Bool(_) => {
70            matches!(vector, Vector::Bool(_))
71        }
72        DType::Primitive(ptype, _) => match vector {
73            Vector::Primitive(v) => ptype == &v.ptype(),
74            _ => false,
75        },
76        DType::Decimal(dec_type, _) => match vector {
77            Vector::Decimal(v) => {
78                dec_type.precision() == v.precision() && dec_type.scale() == v.scale()
79            }
80            _ => false,
81        },
82        DType::Utf8(_) => {
83            matches!(vector, Vector::String(_))
84        }
85        DType::Binary(_) => {
86            matches!(vector, Vector::Binary(_))
87        }
88        DType::List(elements, _) => match vector {
89            Vector::List(v) => vector_matches_dtype(v.elements(), elements.as_ref()),
90            _ => false,
91        },
92        DType::FixedSizeList(elements, size, _) => match vector {
93            Vector::FixedSizeList(v) => {
94                v.list_size() == *size && vector_matches_dtype(v.elements(), elements.as_ref())
95            }
96            _ => false,
97        },
98        DType::Struct(fields, _) => match vector {
99            Vector::Struct(v) => {
100                if fields.nfields() != v.fields().len() {
101                    return false;
102                }
103                for (field_dtype, field_vector) in fields.fields().zip(v.fields().iter()) {
104                    if !vector_matches_dtype(field_vector, &field_dtype) {
105                        return false;
106                    }
107                }
108                true
109            }
110            _ => false,
111        },
112        DType::Extension(ext_dtype) => {
113            // For extension types, we check the storage type.
114            vector_matches_dtype(vector, ext_dtype.storage_dtype())
115        }
116    }
117}
118
119/// Returns true if the scalar's is compatible with the provided data type.
120pub fn scalar_matches_dtype(scalar: &Scalar, dtype: &DType) -> bool {
121    if !dtype.is_nullable() && scalar.is_null() {
122        // Non-nullable dtype cannot have nulls in the scalar.
123        return false;
124    }
125
126    // Note that we don't match a tuple here to make sure we have an exhaustive match that will
127    // fail to compile if we ever add new DTypes.
128    match dtype {
129        DType::Null => {
130            matches!(scalar, Scalar::Null(_))
131        }
132        DType::Bool(_) => {
133            matches!(scalar, Scalar::Bool(_))
134        }
135        DType::Primitive(ptype, _) => match scalar {
136            Scalar::Primitive(s) => ptype == &s.ptype(),
137            _ => false,
138        },
139        DType::Decimal(dec_type, _) => match scalar {
140            Scalar::Decimal(s) => {
141                dec_type.precision() == s.precision() && dec_type.scale() == s.scale()
142            }
143            _ => false,
144        },
145        DType::Utf8(_) => {
146            matches!(scalar, Scalar::String(_))
147        }
148        DType::Binary(_) => {
149            matches!(scalar, Scalar::Binary(_))
150        }
151        DType::List(_, _) => match scalar {
152            Scalar::List(s) => vector_matches_dtype(&Vector::from(s.value().clone()), dtype),
153            _ => false,
154        },
155        DType::FixedSizeList(..) => match scalar {
156            Scalar::FixedSizeList(s) => {
157                vector_matches_dtype(&Vector::from(s.value().clone()), dtype)
158            }
159            _ => false,
160        },
161        DType::Struct(_, _) => match scalar {
162            Scalar::Struct(s) => vector_matches_dtype(&Vector::from(s.value().clone()), dtype),
163            _ => false,
164        },
165        DType::Extension(ext_dtype) => {
166            // For extension types, we check the storage type.
167            scalar_matches_dtype(scalar, ext_dtype.storage_dtype())
168        }
169    }
170}