vortex_vector/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4// TODO(connor): Explain what vectors are, why we need them for the new operator model of arrays,
5// differences from Arrow (builders and arrays and scalars), etc.
6//! Immutable and mutable decompressed (canonical) vectors for Vortex.
7
8#![deny(missing_docs)]
9#![deny(clippy::missing_errors_doc)]
10#![deny(clippy::missing_panics_doc)]
11#![deny(clippy::missing_safety_doc)]
12
13pub mod binaryview;
14pub mod bool;
15pub mod decimal;
16pub mod fixed_size_list;
17pub mod listview;
18pub mod null;
19pub mod primitive;
20pub mod struct_;
21
22mod datum;
23mod scalar;
24mod scalar_ops;
25mod vector;
26mod vector_mut;
27mod vector_ops;
28
29pub use datum::Datum;
30pub use scalar::Scalar;
31pub use scalar_ops::ScalarOps;
32pub use vector::Vector;
33pub use vector_mut::VectorMut;
34pub use vector_ops::{VectorMutOps, VectorOps};
35use vortex_dtype::DType;
36
37mod macros;
38mod private;
39mod scalar_macros;
40
41/// Returns true if the vector's is compatible with the provided data type.
42///
43/// This means that the vector's physical representation is compatible with the data type,
44/// typically meaning the enum variants match. In the case of nested types, this function
45/// recursively checks the child types.
46///
47/// This function also checks that if the data type is non-nullable, the vector contains no nulls,
48pub fn vector_matches_dtype(vector: &Vector, dtype: &DType) -> bool {
49    if !dtype.is_nullable() && vector.validity().false_count() > 0 {
50        // Non-nullable dtype cannot have nulls in the vector.
51        return false;
52    }
53
54    // Note that we don't match a tuple here to make sure we have an exhaustive match that will
55    // fail to compile if we ever add new DTypes.
56    match dtype {
57        DType::Null => {
58            matches!(vector, Vector::Null(_))
59        }
60        DType::Bool(_) => {
61            matches!(vector, Vector::Bool(_))
62        }
63        DType::Primitive(ptype, _) => match vector {
64            Vector::Primitive(v) => ptype == &v.ptype(),
65            _ => false,
66        },
67        DType::Decimal(dec_type, _) => match vector {
68            Vector::Decimal(v) => {
69                dec_type.precision() == v.precision() && dec_type.scale() == v.scale()
70            }
71            _ => false,
72        },
73        DType::Utf8(_) => {
74            matches!(vector, Vector::String(_))
75        }
76        DType::Binary(_) => {
77            matches!(vector, Vector::Binary(_))
78        }
79        DType::List(elements, _) => match vector {
80            Vector::List(v) => vector_matches_dtype(v.elements(), elements.as_ref()),
81            _ => false,
82        },
83        DType::FixedSizeList(elements, size, _) => match vector {
84            Vector::FixedSizeList(v) => {
85                v.element_size() == *size && vector_matches_dtype(v.elements(), elements.as_ref())
86            }
87            _ => false,
88        },
89        DType::Struct(fields, _) => match vector {
90            Vector::Struct(v) => {
91                if fields.nfields() != v.fields().len() {
92                    return false;
93                }
94                for (field_dtype, field_vector) in fields.fields().zip(v.fields().iter()) {
95                    if !vector_matches_dtype(field_vector, &field_dtype) {
96                        return false;
97                    }
98                }
99                true
100            }
101            _ => false,
102        },
103        DType::Extension(ext_dtype) => {
104            // For extension types, we check the storage type.
105            vector_matches_dtype(vector, ext_dtype.storage_dtype())
106        }
107    }
108}