vortex_array/arrays/struct_/
array.rs1use std::iter::once;
5use std::sync::Arc;
6
7use vortex_error::VortexExpect;
8use vortex_error::VortexResult;
9use vortex_error::vortex_err;
10
11use crate::ArrayRef;
12use crate::IntoArray;
13use crate::array::Array;
14use crate::array::ArrayParts;
15use crate::array::EmptyArrayData;
16use crate::array::TypedArrayRef;
17use crate::array::child_to_validity;
18use crate::array::validity_to_child;
19use crate::arrays::Struct;
20use crate::dtype::DType;
21use crate::dtype::FieldName;
22use crate::dtype::FieldNames;
23use crate::dtype::StructFields;
24use crate::validity::Validity;
25
26pub(super) const VALIDITY_SLOT: usize = 0;
29pub(super) const FIELDS_OFFSET: usize = 1;
31
32pub struct StructDataParts {
153 pub struct_fields: StructFields,
154 pub fields: Arc<[ArrayRef]>,
155 pub validity: Validity,
156}
157
158pub(super) fn make_struct_slots(
159 fields: &[ArrayRef],
160 validity: &Validity,
161 length: usize,
162) -> Vec<Option<ArrayRef>> {
163 once(validity_to_child(validity, length))
164 .chain(fields.iter().cloned().map(Some))
165 .collect()
166}
167
168pub trait StructArrayExt: TypedArrayRef<Struct> {
169 fn nullability(&self) -> crate::dtype::Nullability {
170 match self.as_ref().dtype() {
171 DType::Struct(_, nullability) => *nullability,
172 _ => unreachable!("StructArrayExt requires a struct dtype"),
173 }
174 }
175
176 fn names(&self) -> &FieldNames {
177 self.as_ref().dtype().as_struct_fields().names()
178 }
179
180 fn struct_validity(&self) -> Validity {
181 child_to_validity(&self.as_ref().slots()[VALIDITY_SLOT], self.nullability())
182 }
183
184 fn iter_unmasked_fields(&self) -> impl Iterator<Item = &ArrayRef> + '_ {
185 self.as_ref().slots()[FIELDS_OFFSET..]
186 .iter()
187 .map(|s| s.as_ref().vortex_expect("StructArray field slot"))
188 }
189
190 fn unmasked_fields(&self) -> Arc<[ArrayRef]> {
191 self.iter_unmasked_fields().cloned().collect()
192 }
193
194 fn unmasked_field(&self, idx: usize) -> &ArrayRef {
195 self.as_ref().slots()[FIELDS_OFFSET + idx]
196 .as_ref()
197 .vortex_expect("StructArray field slot")
198 }
199
200 fn unmasked_field_by_name_opt(&self, name: impl AsRef<str>) -> Option<&ArrayRef> {
201 let name = name.as_ref();
202 self.struct_fields()
203 .find(name)
204 .map(|idx| self.unmasked_field(idx))
205 }
206
207 fn unmasked_field_by_name(&self, name: impl AsRef<str>) -> VortexResult<&ArrayRef> {
208 let name = name.as_ref();
209 self.unmasked_field_by_name_opt(name).ok_or_else(|| {
210 vortex_err!(
211 "Field {name} not found in struct array with names {:?}",
212 self.names()
213 )
214 })
215 }
216
217 fn struct_fields(&self) -> &StructFields {
218 self.as_ref().dtype().as_struct_fields()
219 }
220}
221impl<T: TypedArrayRef<Struct>> StructArrayExt for T {}
222
223impl Array<Struct> {
224 pub fn new(
226 names: FieldNames,
227 fields: impl Into<Arc<[ArrayRef]>>,
228 length: usize,
229 validity: Validity,
230 ) -> Self {
231 Self::try_new(names, fields, length, validity)
232 .vortex_expect("StructArray construction failed")
233 }
234
235 pub fn try_new(
237 names: FieldNames,
238 fields: impl Into<Arc<[ArrayRef]>>,
239 length: usize,
240 validity: Validity,
241 ) -> VortexResult<Self> {
242 let fields = fields.into();
243 let field_dtypes: Vec<_> = fields.iter().map(|d| d.dtype().clone()).collect();
244 let dtype = StructFields::new(names, field_dtypes);
245 let slots = make_struct_slots(&fields, &validity, length);
246 Array::try_from_parts(
247 ArrayParts::new(
248 Struct,
249 DType::Struct(dtype, validity.nullability()),
250 length,
251 EmptyArrayData,
252 )
253 .with_slots(slots),
254 )
255 }
256
257 pub unsafe fn new_unchecked(
263 fields: impl Into<Arc<[ArrayRef]>>,
264 dtype: StructFields,
265 length: usize,
266 validity: Validity,
267 ) -> Self {
268 let fields = fields.into();
269 let outer_dtype = DType::Struct(dtype, validity.nullability());
270 let slots = make_struct_slots(&fields, &validity, length);
271 unsafe {
272 Array::from_parts_unchecked(
273 ArrayParts::new(Struct, outer_dtype, length, EmptyArrayData).with_slots(slots),
274 )
275 }
276 }
277
278 pub fn try_new_with_dtype(
280 fields: impl Into<Arc<[ArrayRef]>>,
281 dtype: StructFields,
282 length: usize,
283 validity: Validity,
284 ) -> VortexResult<Self> {
285 let fields = fields.into();
286 let outer_dtype = DType::Struct(dtype, validity.nullability());
287 let slots = make_struct_slots(&fields, &validity, length);
288 Array::try_from_parts(
289 ArrayParts::new(Struct, outer_dtype, length, EmptyArrayData).with_slots(slots),
290 )
291 }
292
293 pub fn from_fields<N: AsRef<str>>(items: &[(N, ArrayRef)]) -> VortexResult<Self> {
295 Self::try_from_iter(items.iter().map(|(a, b)| (a, b.clone())))
296 }
297
298 pub fn try_from_iter_with_validity<
300 N: AsRef<str>,
301 A: IntoArray,
302 T: IntoIterator<Item = (N, A)>,
303 >(
304 iter: T,
305 validity: Validity,
306 ) -> VortexResult<Self> {
307 let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
308 .into_iter()
309 .map(|(name, fields)| (FieldName::from(name.as_ref()), fields.into_array()))
310 .unzip();
311 let len = fields
312 .first()
313 .map(|f| f.len())
314 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
315
316 Self::try_new(FieldNames::from_iter(names), fields, len, validity)
317 }
318
319 pub fn try_from_iter<N: AsRef<str>, A: IntoArray, T: IntoIterator<Item = (N, A)>>(
321 iter: T,
322 ) -> VortexResult<Self> {
323 let (names, fields): (Vec<FieldName>, Vec<ArrayRef>) = iter
324 .into_iter()
325 .map(|(name, field)| (FieldName::from(name.as_ref()), field.into_array()))
326 .unzip();
327 let len = fields
328 .first()
329 .map(ArrayRef::len)
330 .ok_or_else(|| vortex_err!("StructArray cannot be constructed from an empty slice of arrays because the length is unspecified"))?;
331
332 Self::try_new(
333 FieldNames::from_iter(names),
334 fields,
335 len,
336 Validity::NonNullable,
337 )
338 }
339
340 pub fn project(&self, projection: &[FieldName]) -> VortexResult<Self> {
348 let mut children = Vec::with_capacity(projection.len());
349 let mut names = Vec::with_capacity(projection.len());
350
351 for f_name in projection {
352 let idx = self
353 .struct_fields()
354 .find(f_name.as_ref())
355 .ok_or_else(|| vortex_err!("Unknown field {f_name}"))?;
356
357 names.push(self.names()[idx].clone());
358 children.push(self.unmasked_field(idx).clone());
359 }
360
361 Self::try_new(
362 FieldNames::from(names.as_slice()),
363 children,
364 self.len(),
365 self.validity()?,
366 )
367 }
368
369 pub fn new_fieldless_with_len(len: usize) -> Self {
371 let dtype = DType::Struct(
372 StructFields::new(FieldNames::default(), Vec::new()),
373 crate::dtype::Nullability::NonNullable,
374 );
375 let slots = make_struct_slots(&[], &Validity::NonNullable, len);
376 unsafe {
377 Array::from_parts_unchecked(
378 ArrayParts::new(Struct, dtype, len, EmptyArrayData).with_slots(slots),
379 )
380 }
381 }
382
383 pub fn into_data_parts(self) -> StructDataParts {
385 let fields: Arc<[ArrayRef]> = self.slots()[FIELDS_OFFSET..]
386 .iter()
387 .map(|s| s.as_ref().vortex_expect("StructArray field slot").clone())
388 .collect();
389 let validity = self.validity().vortex_expect("StructArray validity");
390 StructDataParts {
391 struct_fields: self.struct_fields().clone(),
392 fields,
393 validity,
394 }
395 }
396
397 pub fn remove_column(&self, name: impl Into<FieldName>) -> Option<(Self, ArrayRef)> {
398 let name = name.into();
399 let struct_dtype = self.struct_fields();
400 let len = self.len();
401
402 let position = struct_dtype.find(name.as_ref())?;
403
404 let slot_position = FIELDS_OFFSET + position;
405 let field = self.slots()[slot_position]
406 .as_ref()
407 .vortex_expect("StructArray field slot")
408 .clone();
409 let new_slots: Vec<Option<ArrayRef>> = self
410 .slots()
411 .iter()
412 .enumerate()
413 .filter(|(i, _)| *i != slot_position)
414 .map(|(_, s)| s.clone())
415 .collect();
416
417 let new_dtype = struct_dtype.without_field(position).ok()?;
418 let new_array = unsafe {
419 Array::from_parts_unchecked(
420 ArrayParts::new(
421 Struct,
422 DType::Struct(new_dtype, self.dtype().nullability()),
423 len,
424 EmptyArrayData,
425 )
426 .with_slots(new_slots),
427 )
428 };
429 Some((new_array, field))
430 }
431}
432
433impl Array<Struct> {
434 pub fn with_column(&self, name: impl Into<FieldName>, array: ArrayRef) -> VortexResult<Self> {
435 let name = name.into();
436 let struct_dtype = self.struct_fields();
437
438 let names = struct_dtype.names().iter().cloned().chain(once(name));
439 let types = struct_dtype.fields().chain(once(array.dtype().clone()));
440 let new_fields = StructFields::new(names.collect(), types.collect());
441
442 let children: Arc<[ArrayRef]> = self.slots()[FIELDS_OFFSET..]
443 .iter()
444 .map(|s| s.as_ref().vortex_expect("StructArray field slot").clone())
445 .chain(once(array))
446 .collect();
447
448 Self::try_new_with_dtype(children, new_fields, self.len(), self.validity()?)
449 }
450
451 pub fn remove_column_owned(&self, name: impl Into<FieldName>) -> Option<(Self, ArrayRef)> {
452 self.remove_column(name)
453 }
454}