vortex_array/scalar_fns/get_item/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use prost::Message;
5use vortex_dtype::DType;
6use vortex_dtype::FieldName;
7use vortex_dtype::FieldPath;
8use vortex_dtype::Nullability;
9use vortex_error::VortexResult;
10use vortex_error::vortex_err;
11use vortex_proto::expr as pb;
12use vortex_vector::Datum;
13use vortex_vector::ScalarOps;
14use vortex_vector::VectorOps;
15
16use crate::expr::Expression;
17use crate::expr::StatsCatalog;
18use crate::expr::functions::ArgName;
19use crate::expr::functions::Arity;
20use crate::expr::functions::ExecutionArgs;
21use crate::expr::functions::FunctionId;
22use crate::expr::functions::VTable;
23use crate::expr::stats::Stat;
24
25pub struct GetItemFn;
26impl VTable for GetItemFn {
27    type Options = FieldName;
28
29    fn id(&self) -> FunctionId {
30        FunctionId::from("vortex.get_item")
31    }
32
33    fn serialize(&self, field_name: &FieldName) -> VortexResult<Option<Vec<u8>>> {
34        Ok(Some(
35            pb::GetItemOpts {
36                path: field_name.to_string(),
37            }
38            .encode_to_vec(),
39        ))
40    }
41
42    fn deserialize(&self, bytes: &[u8]) -> VortexResult<Self::Options> {
43        let opts = pb::GetItemOpts::decode(bytes)?;
44        Ok(FieldName::from(opts.path))
45    }
46
47    fn arity(&self, _field_name: &FieldName) -> Arity {
48        Arity::Exact(1)
49    }
50
51    fn arg_name(&self, _field_name: &FieldName, _arg_idx: usize) -> ArgName {
52        ArgName::from("input")
53    }
54
55    fn stat_expression(
56        &self,
57        field_name: &FieldName,
58        _expr: &Expression,
59        stat: Stat,
60        catalog: &dyn StatsCatalog,
61    ) -> Option<Expression> {
62        // TODO(ngates): I think we can do better here and support stats over nested fields.
63        //  It would be nice if delegating to our child would return a struct of statistics
64        //  matching the nested DType such that we can write:
65        //    `get_item(expr.child(0).stat_expression(...), expr.data().field_name())`
66
67        // TODO(ngates): this is a bug whereby we may return stats for a nested field of the same
68        //  name as a field in the root struct. This should be resolved with upcoming change to
69        //  falsify expressions, but for now I'm preserving the existing buggy behavior.
70        catalog.stats_ref(&FieldPath::from_name(field_name.clone()), stat)
71    }
72
73    fn return_dtype(&self, field_name: &FieldName, arg_types: &[DType]) -> VortexResult<DType> {
74        let struct_dtype = &arg_types[0];
75        let field_dtype = struct_dtype
76            .as_struct_fields_opt()
77            .and_then(|st| st.field(field_name))
78            .ok_or_else(|| {
79                vortex_err!("Couldn't find the {} field in the input scope", field_name)
80            })?;
81
82        // Match here to avoid cloning the dtype if nullability doesn't need to change
83        if matches!(
84            (struct_dtype.nullability(), field_dtype.nullability()),
85            (Nullability::Nullable, Nullability::NonNullable)
86        ) {
87            return Ok(field_dtype.with_nullability(Nullability::Nullable));
88        }
89
90        Ok(field_dtype)
91    }
92
93    fn execute(&self, field_name: &FieldName, args: &ExecutionArgs) -> VortexResult<Datum> {
94        let struct_dtype = args
95            .input_type(0)
96            .as_struct_fields_opt()
97            .ok_or_else(|| vortex_err!("Expected struct dtype for child of GetItem expression"))?;
98        let field_idx = struct_dtype
99            .find(field_name)
100            .ok_or_else(|| vortex_err!("Field {} not found in struct dtype", field_name))?;
101
102        match args.input_datums(0) {
103            Datum::Scalar(s) => {
104                let mut field = s.as_struct().field(field_idx);
105                field.mask_validity(s.is_valid());
106                Ok(Datum::Scalar(field))
107            }
108            Datum::Vector(v) => {
109                let mut field = v.as_struct().fields()[field_idx].clone();
110                field.mask_validity(v.validity());
111                Ok(Datum::Vector(field))
112            }
113        }
114    }
115}