vortex_array/scalar_fn/fns/
pack.rs1use std::fmt::Display;
5use std::fmt::Formatter;
6use std::hash::Hash;
7use std::sync::Arc;
8
9use itertools::Itertools as _;
10use prost::Message;
11use vortex_error::VortexResult;
12use vortex_proto::expr as pb;
13use vortex_session::VortexSession;
14use vortex_session::registry::CachedId;
15
16use crate::ArrayRef;
17use crate::ExecutionCtx;
18use crate::IntoArray;
19use crate::arrays::StructArray;
20use crate::dtype::DType;
21use crate::dtype::FieldName;
22use crate::dtype::FieldNames;
23use crate::dtype::Nullability;
24use crate::dtype::StructFields;
25use crate::expr::Expression;
26use crate::expr::lit;
27use crate::scalar_fn::Arity;
28use crate::scalar_fn::ChildName;
29use crate::scalar_fn::ExecutionArgs;
30use crate::scalar_fn::ScalarFnId;
31use crate::scalar_fn::ScalarFnVTable;
32use crate::validity::Validity;
33
34#[derive(Clone)]
36pub struct Pack;
37
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub struct PackOptions {
40 pub names: FieldNames,
41 pub nullability: Nullability,
42}
43
44impl Display for PackOptions {
45 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
46 write!(
47 f,
48 "names: [{}], nullability: {:#}",
49 self.names.iter().join(", "),
50 self.nullability
51 )
52 }
53}
54
55impl ScalarFnVTable for Pack {
56 type Options = PackOptions;
57
58 fn id(&self) -> ScalarFnId {
59 static ID: CachedId = CachedId::new("vortex.pack");
60 *ID
61 }
62
63 fn serialize(&self, instance: &Self::Options) -> VortexResult<Option<Vec<u8>>> {
64 Ok(Some(
65 pb::PackOpts {
66 paths: instance.names.iter().map(|n| n.to_string()).collect(),
67 nullable: instance.nullability.into(),
68 }
69 .encode_to_vec(),
70 ))
71 }
72
73 fn deserialize(
74 &self,
75 _metadata: &[u8],
76 _session: &VortexSession,
77 ) -> VortexResult<Self::Options> {
78 let opts = pb::PackOpts::decode(_metadata)?;
79 let names: FieldNames = opts
80 .paths
81 .iter()
82 .map(|name| FieldName::from(name.as_str()))
83 .collect();
84 Ok(PackOptions {
85 names,
86 nullability: opts.nullable.into(),
87 })
88 }
89
90 fn arity(&self, options: &Self::Options) -> Arity {
91 Arity::Exact(options.names.len())
92 }
93
94 fn child_name(&self, instance: &Self::Options, child_idx: usize) -> ChildName {
95 match instance.names.get(child_idx) {
96 Some(name) => ChildName::from(Arc::clone(name.inner())),
97 None => unreachable!(
98 "Invalid child index {} for Pack expression with {} fields",
99 child_idx,
100 instance.names.len()
101 ),
102 }
103 }
104
105 fn fmt_sql(
106 &self,
107 options: &Self::Options,
108 expr: &Expression,
109 f: &mut Formatter<'_>,
110 ) -> std::fmt::Result {
111 write!(f, "pack(")?;
112 for (i, (name, child)) in options.names.iter().zip(expr.children().iter()).enumerate() {
113 write!(f, "{}: ", name)?;
114 child.fmt_sql(f)?;
115 if i + 1 < options.names.len() {
116 write!(f, ", ")?;
117 }
118 }
119 write!(f, "){}", options.nullability)
120 }
121
122 fn return_dtype(&self, options: &Self::Options, arg_dtypes: &[DType]) -> VortexResult<DType> {
123 Ok(DType::Struct(
124 StructFields::new(options.names.clone(), arg_dtypes.to_vec()),
125 options.nullability,
126 ))
127 }
128
129 fn validity(
130 &self,
131 _options: &Self::Options,
132 _expression: &Expression,
133 ) -> VortexResult<Option<Expression>> {
134 Ok(Some(lit(true)))
135 }
136
137 fn execute(
138 &self,
139 options: &Self::Options,
140 args: &dyn ExecutionArgs,
141 ctx: &mut ExecutionCtx,
142 ) -> VortexResult<ArrayRef> {
143 let len = args.row_count();
144 let value_arrays: Vec<ArrayRef> = (0..args.num_inputs())
145 .map(|i| args.get(i))
146 .collect::<VortexResult<_>>()?;
147 let validity: Validity = options.nullability.into();
148 StructArray::try_new(options.names.clone(), value_arrays, len, validity)?
149 .into_array()
150 .execute(ctx)
151 }
152
153 fn is_null_sensitive(&self, _instance: &Self::Options) -> bool {
155 true
156 }
157
158 fn is_fallible(&self, _instance: &Self::Options) -> bool {
159 false
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use vortex_buffer::buffer;
166 use vortex_error::VortexResult;
167 use vortex_error::vortex_bail;
168
169 use super::Pack;
170 use super::PackOptions;
171 use crate::ArrayRef;
172 use crate::IntoArray;
173 #[expect(deprecated)]
174 use crate::ToCanonical as _;
175 use crate::arrays::PrimitiveArray;
176 use crate::arrays::struct_::StructArrayExt;
177 use crate::assert_arrays_eq;
178 use crate::dtype::Nullability;
179 use crate::expr::col;
180 use crate::expr::pack;
181 use crate::scalar_fn::ScalarFnVTableExt;
182 use crate::scalar_fn::fns::pack::StructArray;
183 use crate::validity::Validity;
184
185 fn test_array() -> ArrayRef {
186 StructArray::from_fields(&[
187 ("a", buffer![0, 1, 2].into_array()),
188 ("b", buffer![4, 5, 6].into_array()),
189 ])
190 .unwrap()
191 .into_array()
192 }
193
194 fn primitive_field(array: &ArrayRef, field_path: &[&str]) -> VortexResult<PrimitiveArray> {
195 let mut field_path = field_path.iter();
196
197 let Some(field) = field_path.next() else {
198 vortex_bail!("empty field path");
199 };
200
201 #[expect(deprecated)]
202 let mut array = array.to_struct().unmasked_field_by_name(field)?.clone();
203 for field in field_path {
204 #[expect(deprecated)]
205 let next = array.to_struct().unmasked_field_by_name(field)?.clone();
206 array = next;
207 }
208 #[expect(deprecated)]
209 let result = array.to_primitive();
210 Ok(result)
211 }
212
213 #[test]
214 pub fn test_empty_pack() {
215 let expr = Pack.new_expr(
216 PackOptions {
217 names: Default::default(),
218 nullability: Default::default(),
219 },
220 [],
221 );
222
223 let test_array = test_array();
224 let actual_array = test_array.clone().apply(&expr).unwrap();
225 assert_eq!(actual_array.len(), test_array.len());
226 #[expect(deprecated)]
227 let nfields = actual_array.to_struct().struct_fields().nfields();
228 assert_eq!(nfields, 0);
229 }
230
231 #[test]
232 pub fn test_simple_pack() {
233 let expr = Pack.new_expr(
234 PackOptions {
235 names: ["one", "two", "three"].into(),
236 nullability: Nullability::NonNullable,
237 },
238 [col("a"), col("b"), col("a")],
239 );
240
241 #[expect(deprecated)]
242 let actual_array = test_array().apply(&expr).unwrap().to_struct();
243
244 assert_eq!(actual_array.names(), ["one", "two", "three"]);
245 assert!(matches!(actual_array.validity(), Ok(Validity::NonNullable)));
246
247 assert_arrays_eq!(
248 primitive_field(&actual_array.clone().into_array(), &["one"]).unwrap(),
249 PrimitiveArray::from_iter([0i32, 1, 2])
250 );
251 assert_arrays_eq!(
252 primitive_field(&actual_array.clone().into_array(), &["two"]).unwrap(),
253 PrimitiveArray::from_iter([4i32, 5, 6])
254 );
255 assert_arrays_eq!(
256 primitive_field(&actual_array.into_array(), &["three"]).unwrap(),
257 PrimitiveArray::from_iter([0i32, 1, 2])
258 );
259 }
260
261 #[test]
262 pub fn test_nested_pack() {
263 let expr = Pack.new_expr(
264 PackOptions {
265 names: ["one", "two", "three"].into(),
266 nullability: Nullability::NonNullable,
267 },
268 [
269 col("a"),
270 Pack.new_expr(
271 PackOptions {
272 names: ["two_one", "two_two"].into(),
273 nullability: Nullability::NonNullable,
274 },
275 [col("b"), col("b")],
276 ),
277 col("a"),
278 ],
279 );
280
281 #[expect(deprecated)]
282 let actual_array = test_array().apply(&expr).unwrap().to_struct();
283
284 assert_eq!(actual_array.names(), ["one", "two", "three"]);
285
286 assert_arrays_eq!(
287 primitive_field(&actual_array.clone().into_array(), &["one"]).unwrap(),
288 PrimitiveArray::from_iter([0i32, 1, 2])
289 );
290 assert_arrays_eq!(
291 primitive_field(&actual_array.clone().into_array(), &["two", "two_one"]).unwrap(),
292 PrimitiveArray::from_iter([4i32, 5, 6])
293 );
294 assert_arrays_eq!(
295 primitive_field(&actual_array.clone().into_array(), &["two", "two_two"]).unwrap(),
296 PrimitiveArray::from_iter([4i32, 5, 6])
297 );
298 assert_arrays_eq!(
299 primitive_field(&actual_array.into_array(), &["three"]).unwrap(),
300 PrimitiveArray::from_iter([0i32, 1, 2])
301 );
302 }
303
304 #[test]
305 pub fn test_pack_nullable() {
306 let expr = Pack.new_expr(
307 PackOptions {
308 names: ["one", "two", "three"].into(),
309 nullability: Nullability::Nullable,
310 },
311 [col("a"), col("b"), col("a")],
312 );
313
314 #[expect(deprecated)]
315 let actual_array = test_array().apply(&expr).unwrap().to_struct();
316
317 assert_eq!(actual_array.names(), ["one", "two", "three"]);
318 assert!(matches!(actual_array.validity(), Ok(Validity::AllValid)));
319 }
320
321 #[test]
322 pub fn test_display() {
323 let expr = pack(
324 [("id", col("user_id")), ("name", col("username"))],
325 Nullability::NonNullable,
326 );
327 assert_eq!(expr.to_string(), "pack(id: $.user_id, name: $.username)");
328
329 let expr2 = Pack.new_expr(
330 PackOptions {
331 names: ["x", "y"].into(),
332 nullability: Nullability::Nullable,
333 },
334 [col("a"), col("b")],
335 );
336 assert_eq!(expr2.to_string(), "pack(x: $.a, y: $.b)?");
337 }
338}