1use std::hash::Hash;
5
6use itertools::Itertools as _;
7use vortex_array::arrays::StructArray;
8use vortex_array::validity::Validity;
9use vortex_array::{ArrayRef, DeserializeMetadata, IntoArray, ProstMetadata};
10use vortex_dtype::{DType, FieldName, FieldNames, Nullability, StructFields};
11use vortex_error::{VortexExpect as _, VortexResult, vortex_bail, vortex_err};
12use vortex_proto::expr as pb;
13
14use crate::display::{DisplayAs, DisplayFormat};
15use crate::{AnalysisExpr, ExprEncodingRef, ExprId, ExprRef, IntoExpr, Scope, VTable, vtable};
16
17vtable!(Pack);
18
19#[allow(clippy::derived_hash_with_manual_eq)]
48#[derive(Debug, Clone, PartialEq, Eq, Hash)]
49pub struct PackExpr {
50 names: FieldNames,
51 values: Vec<ExprRef>,
52 nullability: Nullability,
53}
54
55pub struct PackExprEncoding;
56
57impl VTable for PackVTable {
58 type Expr = PackExpr;
59 type Encoding = PackExprEncoding;
60 type Metadata = ProstMetadata<pb::PackOpts>;
61
62 fn id(_encoding: &Self::Encoding) -> ExprId {
63 ExprId::new_ref("pack")
64 }
65
66 fn encoding(_expr: &Self::Expr) -> ExprEncodingRef {
67 ExprEncodingRef::new_ref(PackExprEncoding.as_ref())
68 }
69
70 fn metadata(expr: &Self::Expr) -> Option<Self::Metadata> {
71 Some(ProstMetadata(pb::PackOpts {
72 paths: expr.names.iter().map(|n| n.to_string()).collect(),
73 nullable: expr.nullability.into(),
74 }))
75 }
76
77 fn children(expr: &Self::Expr) -> Vec<&ExprRef> {
78 expr.values.iter().collect()
79 }
80
81 fn with_children(expr: &Self::Expr, children: Vec<ExprRef>) -> VortexResult<Self::Expr> {
82 PackExpr::try_new(expr.names.clone(), children, expr.nullability)
83 }
84
85 fn build(
86 _encoding: &Self::Encoding,
87 metadata: &<Self::Metadata as DeserializeMetadata>::Output,
88 children: Vec<ExprRef>,
89 ) -> VortexResult<Self::Expr> {
90 if children.len() != metadata.paths.len() {
91 vortex_bail!(
92 "Pack expression expects {} children, got {}",
93 metadata.paths.len(),
94 children.len()
95 );
96 }
97 let names: FieldNames = metadata
98 .paths
99 .iter()
100 .map(|name| FieldName::from(name.as_str()))
101 .collect();
102 PackExpr::try_new(names, children, metadata.nullable.into())
103 }
104
105 fn evaluate(expr: &Self::Expr, scope: &Scope) -> VortexResult<ArrayRef> {
106 let len = scope.len();
107 let value_arrays = expr
108 .values
109 .iter()
110 .map(|value_expr| value_expr.unchecked_evaluate(scope))
111 .process_results(|it| it.collect::<Vec<_>>())?;
112 let validity = match expr.nullability {
113 Nullability::NonNullable => Validity::NonNullable,
114 Nullability::Nullable => Validity::AllValid,
115 };
116 Ok(StructArray::try_new(expr.names.clone(), value_arrays, len, validity)?.into_array())
117 }
118
119 fn return_dtype(expr: &Self::Expr, scope: &DType) -> VortexResult<DType> {
120 let value_dtypes = expr
121 .values
122 .iter()
123 .map(|value_expr| value_expr.return_dtype(scope))
124 .process_results(|it| it.collect())?;
125 Ok(DType::Struct(
126 StructFields::new(expr.names.clone(), value_dtypes),
127 expr.nullability,
128 ))
129 }
130}
131
132impl PackExpr {
133 pub fn try_new(
134 names: FieldNames,
135 values: Vec<ExprRef>,
136 nullability: Nullability,
137 ) -> VortexResult<Self> {
138 if names.len() != values.len() {
139 vortex_bail!("length mismatch {} {}", names.len(), values.len());
140 }
141 Ok(PackExpr {
142 names,
143 values,
144 nullability,
145 })
146 }
147
148 pub fn try_new_expr(
149 names: FieldNames,
150 values: Vec<ExprRef>,
151 nullability: Nullability,
152 ) -> VortexResult<ExprRef> {
153 Self::try_new(names, values, nullability).map(|v| v.into_expr())
154 }
155
156 pub fn names(&self) -> &FieldNames {
157 &self.names
158 }
159
160 pub fn field(&self, field_name: &FieldName) -> VortexResult<ExprRef> {
161 let idx = self
162 .names
163 .iter()
164 .position(|name| name == field_name)
165 .ok_or_else(|| {
166 vortex_err!(
167 "Cannot find field {} in pack fields {:?}",
168 field_name,
169 self.names
170 )
171 })?;
172
173 self.values
174 .get(idx)
175 .cloned()
176 .ok_or_else(|| vortex_err!("field index out of bounds: {}", idx))
177 }
178
179 pub fn nullability(&self) -> Nullability {
180 self.nullability
181 }
182}
183
184pub fn pack(
192 elements: impl IntoIterator<Item = (impl Into<FieldName>, ExprRef)>,
193 nullability: Nullability,
194) -> ExprRef {
195 let (names, values): (Vec<_>, Vec<_>) = elements
196 .into_iter()
197 .map(|(name, value)| (name.into(), value))
198 .unzip();
199 PackExpr::try_new(names.into(), values, nullability)
200 .vortex_expect("pack names and values have the same length")
201 .into_expr()
202}
203
204impl DisplayAs for PackExpr {
205 fn fmt_as(&self, df: DisplayFormat, f: &mut std::fmt::Formatter) -> std::fmt::Result {
206 match df {
207 DisplayFormat::Compact => {
208 write!(
209 f,
210 "pack({}){}",
211 self.names
212 .iter()
213 .zip(&self.values)
214 .format_with(", ", |(name, expr), f| f(&format_args!("{name}: {expr}"))),
215 self.nullability
216 )
217 }
218 DisplayFormat::Tree => {
219 write!(f, "Pack")
220 }
221 }
222 }
223
224 fn child_names(&self) -> Option<Vec<String>> {
225 Some(self.names.iter().map(|n| n.to_string()).collect())
226 }
227}
228
229impl AnalysisExpr for PackExpr {}
230
231#[cfg(test)]
232mod tests {
233
234 use vortex_array::arrays::{PrimitiveArray, StructArray};
235 use vortex_array::validity::Validity;
236 use vortex_array::vtable::ValidityHelper;
237 use vortex_array::{Array, ArrayRef, IntoArray, ToCanonical};
238 use vortex_buffer::buffer;
239 use vortex_dtype::{FieldNames, Nullability};
240 use vortex_error::{VortexResult, vortex_bail};
241
242 use crate::{IntoExpr, PackExpr, Scope, col, pack};
243
244 fn test_array() -> ArrayRef {
245 StructArray::from_fields(&[
246 ("a", buffer![0, 1, 2].into_array()),
247 ("b", buffer![4, 5, 6].into_array()),
248 ])
249 .unwrap()
250 .into_array()
251 }
252
253 fn primitive_field(array: &dyn Array, field_path: &[&str]) -> VortexResult<PrimitiveArray> {
254 let mut field_path = field_path.iter();
255
256 let Some(field) = field_path.next() else {
257 vortex_bail!("empty field path");
258 };
259
260 let mut array = array.to_struct()?.field_by_name(field)?.clone();
261 for field in field_path {
262 array = array.to_struct()?.field_by_name(field)?.clone();
263 }
264 Ok(array.to_primitive().unwrap())
265 }
266
267 #[test]
268 pub fn test_empty_pack() {
269 let expr =
270 PackExpr::try_new(FieldNames::default(), Vec::new(), Nullability::NonNullable).unwrap();
271
272 let test_array = test_array();
273 let actual_array = expr.evaluate(&Scope::new(test_array.clone())).unwrap();
274 assert_eq!(actual_array.len(), test_array.len());
275 assert_eq!(
276 actual_array.to_struct().unwrap().struct_fields().nfields(),
277 0
278 );
279 }
280
281 #[test]
282 pub fn test_simple_pack() {
283 let expr = PackExpr::try_new(
284 ["one", "two", "three"].into(),
285 vec![col("a"), col("b"), col("a")],
286 Nullability::NonNullable,
287 )
288 .unwrap();
289
290 let actual_array = expr
291 .evaluate(&Scope::new(test_array()))
292 .unwrap()
293 .to_struct()
294 .unwrap();
295
296 assert_eq!(actual_array.names(), ["one", "two", "three"]);
297 assert_eq!(actual_array.validity(), &Validity::NonNullable);
298
299 assert_eq!(
300 primitive_field(actual_array.as_ref(), &["one"])
301 .unwrap()
302 .as_slice::<i32>(),
303 [0, 1, 2]
304 );
305 assert_eq!(
306 primitive_field(actual_array.as_ref(), &["two"])
307 .unwrap()
308 .as_slice::<i32>(),
309 [4, 5, 6]
310 );
311 assert_eq!(
312 primitive_field(actual_array.as_ref(), &["three"])
313 .unwrap()
314 .as_slice::<i32>(),
315 [0, 1, 2]
316 );
317 }
318
319 #[test]
320 pub fn test_nested_pack() {
321 let expr = PackExpr::try_new(
322 ["one", "two", "three"].into(),
323 vec![
324 col("a"),
325 PackExpr::try_new(
326 ["two_one", "two_two"].into(),
327 vec![col("b"), col("b")],
328 Nullability::NonNullable,
329 )
330 .unwrap()
331 .into_expr(),
332 col("a"),
333 ],
334 Nullability::NonNullable,
335 )
336 .unwrap();
337
338 let actual_array = expr
339 .evaluate(&Scope::new(test_array()))
340 .unwrap()
341 .to_struct()
342 .unwrap();
343
344 assert_eq!(actual_array.names(), ["one", "two", "three"]);
345
346 assert_eq!(
347 primitive_field(actual_array.as_ref(), &["one"])
348 .unwrap()
349 .as_slice::<i32>(),
350 [0, 1, 2]
351 );
352 assert_eq!(
353 primitive_field(actual_array.as_ref(), &["two", "two_one"])
354 .unwrap()
355 .as_slice::<i32>(),
356 [4, 5, 6]
357 );
358 assert_eq!(
359 primitive_field(actual_array.as_ref(), &["two", "two_two"])
360 .unwrap()
361 .as_slice::<i32>(),
362 [4, 5, 6]
363 );
364 assert_eq!(
365 primitive_field(actual_array.as_ref(), &["three"])
366 .unwrap()
367 .as_slice::<i32>(),
368 [0, 1, 2]
369 );
370 }
371
372 #[test]
373 pub fn test_pack_nullable() {
374 let expr = PackExpr::try_new(
375 ["one", "two", "three"].into(),
376 vec![col("a"), col("b"), col("a")],
377 Nullability::Nullable,
378 )
379 .unwrap();
380
381 let actual_array = expr
382 .evaluate(&Scope::new(test_array()))
383 .unwrap()
384 .to_struct()
385 .unwrap();
386
387 assert_eq!(actual_array.names(), ["one", "two", "three"]);
388 assert_eq!(actual_array.validity(), &Validity::AllValid);
389 }
390
391 #[test]
392 pub fn test_display() {
393 let expr = pack(
394 [("id", col("user_id")), ("name", col("username"))],
395 Nullability::NonNullable,
396 );
397 assert_eq!(expr.to_string(), "pack(id: $.user_id, name: $.username)");
398
399 let expr2 = PackExpr::try_new(
400 ["x", "y"].into(),
401 vec![col("a"), col("b")],
402 Nullability::Nullable,
403 )
404 .unwrap();
405 assert_eq!(expr2.to_string(), "pack(x: $.a, y: $.b)?");
406 }
407}