Skip to main content

datafusion_common/
nested_struct.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::error::{_plan_err, Result};
19use arrow::{
20    array::{Array, ArrayRef, StructArray, new_null_array},
21    compute::{CastOptions, cast_with_options},
22    datatypes::{DataType, DataType::Struct, Field, FieldRef},
23};
24use std::{collections::HashSet, sync::Arc};
25
26/// Cast a struct column to match target struct fields, handling nested structs recursively.
27///
28/// This function implements struct-to-struct casting with the assumption that **structs should
29/// always be allowed to cast to other structs**. However, the source column must already be
30/// a struct type - non-struct sources will result in an error.
31///
32/// ## Field Matching Strategy
33/// - **By Name**: Source struct fields are matched to target fields by name (case-sensitive)
34/// - **No Positional Mapping**: Structs with no overlapping field names are rejected
35/// - **Type Adaptation**: When a matching field is found, it is recursively cast to the target field's type
36/// - **Missing Fields**: Target fields not present in the source are filled with null values
37/// - **Extra Fields**: Source fields not present in the target are ignored
38///
39/// ## Nested Struct Handling
40/// - Nested structs are handled recursively using the same casting rules
41/// - Each level of nesting follows the same field matching and null-filling strategy
42/// - This allows for complex struct transformations while maintaining data integrity
43///
44/// # Arguments
45/// * `source_col` - The source array to cast (must be a struct array)
46/// * `target_fields` - The target struct field definitions to cast to
47///
48/// # Returns
49/// A `Result<ArrayRef>` containing the cast struct array
50///
51/// # Errors
52/// Returns a `DataFusionError::Plan` if the source column is not a struct type
53fn cast_struct_column(
54    source_col: &ArrayRef,
55    target_fields: &[Arc<Field>],
56    cast_options: &CastOptions,
57) -> Result<ArrayRef> {
58    if source_col.data_type() == &DataType::Null
59        || (!source_col.is_empty() && source_col.null_count() == source_col.len())
60    {
61        return Ok(new_null_array(
62            &Struct(target_fields.to_vec().into()),
63            source_col.len(),
64        ));
65    }
66
67    if let Some(source_struct) = source_col.as_any().downcast_ref::<StructArray>() {
68        let source_fields = source_struct.fields();
69        validate_struct_compatibility(source_fields, target_fields)?;
70        let mut fields: Vec<Arc<Field>> = Vec::with_capacity(target_fields.len());
71        let mut arrays: Vec<ArrayRef> = Vec::with_capacity(target_fields.len());
72        let num_rows = source_col.len();
73
74        // Iterate target fields and pick source child by name when present.
75        for target_child_field in target_fields.iter() {
76            fields.push(Arc::clone(target_child_field));
77
78            let source_child_opt =
79                source_struct.column_by_name(target_child_field.name());
80
81            match source_child_opt {
82                Some(source_child_col) => {
83                    let adapted_child =
84                        cast_column(source_child_col, target_child_field, cast_options)
85                            .map_err(|e| {
86                            e.context(format!(
87                                "While casting struct field '{}'",
88                                target_child_field.name()
89                            ))
90                        })?;
91                    arrays.push(adapted_child);
92                }
93                None => {
94                    arrays.push(new_null_array(target_child_field.data_type(), num_rows));
95                }
96            }
97        }
98
99        let struct_array =
100            StructArray::new(fields.into(), arrays, source_struct.nulls().cloned());
101        Ok(Arc::new(struct_array))
102    } else {
103        // Return error if source is not a struct type
104        _plan_err!(
105            "Cannot cast column of type {} to struct type. Source must be a struct to cast to struct.",
106            source_col.data_type()
107        )
108    }
109}
110
111/// Cast a column to match the target field type, with special handling for nested structs.
112///
113/// This function serves as the main entry point for column casting operations. For struct
114/// types, it enforces that **only struct columns can be cast to struct types**.
115///
116/// ## Casting Behavior
117/// - **Struct Types**: Delegates to `cast_struct_column` for struct-to-struct casting only
118/// - **Non-Struct Types**: Uses Arrow's standard `cast` function for primitive type conversions
119///
120/// ## Cast Options
121/// The `cast_options` argument controls how Arrow handles values that cannot be represented
122/// in the target type. When `safe` is `false` (DataFusion's default) the cast will return an
123/// error if such a value is encountered. Setting `safe` to `true` instead produces `NULL`
124/// for out-of-range or otherwise invalid values. The options also allow customizing how
125/// temporal values are formatted when cast to strings.
126///
127/// ```
128/// use arrow::array::{ArrayRef, Int64Array};
129/// use arrow::compute::CastOptions;
130/// use arrow::datatypes::{DataType, Field};
131/// use datafusion_common::nested_struct::cast_column;
132/// use std::sync::Arc;
133///
134/// let source: ArrayRef = Arc::new(Int64Array::from(vec![1, i64::MAX]));
135/// let target = Field::new("ints", DataType::Int32, true);
136/// // Permit lossy conversions by producing NULL on overflow instead of erroring
137/// let options = CastOptions {
138///     safe: true,
139///     ..Default::default()
140/// };
141/// let result = cast_column(&source, &target, &options).unwrap();
142/// assert!(result.is_null(1));
143/// ```
144///
145/// ## Struct Casting Requirements
146/// The struct casting logic requires that the source column must already be a struct type.
147/// This makes the function useful for:
148/// - Schema evolution scenarios where struct layouts change over time
149/// - Data migration between different struct schemas
150/// - Type-safe data processing pipelines that maintain struct type integrity
151///
152/// # Arguments
153/// * `source_col` - The source array to cast
154/// * `target_field` - The target field definition (including type and metadata)
155/// * `cast_options` - Options that govern strictness and formatting of the cast
156///
157/// # Returns
158/// A `Result<ArrayRef>` containing the cast array
159///
160/// # Errors
161/// Returns an error if:
162/// - Attempting to cast a non-struct column to a struct type
163/// - Arrow's cast function fails for non-struct types
164/// - Memory allocation fails during struct construction
165/// - Invalid data type combinations are encountered
166pub fn cast_column(
167    source_col: &ArrayRef,
168    target_field: &Field,
169    cast_options: &CastOptions,
170) -> Result<ArrayRef> {
171    match target_field.data_type() {
172        Struct(target_fields) => {
173            cast_struct_column(source_col, target_fields, cast_options)
174        }
175        _ => Ok(cast_with_options(
176            source_col,
177            target_field.data_type(),
178            cast_options,
179        )?),
180    }
181}
182
183/// Validates compatibility between source and target struct fields for casting operations.
184///
185/// This function implements comprehensive struct compatibility checking by examining:
186/// - Field name matching between source and target structs
187/// - Type castability for each matching field (including recursive struct validation)
188/// - Proper handling of missing fields (target fields not in source are allowed - filled with nulls)
189/// - Proper handling of extra fields (source fields not in target are allowed - ignored)
190///
191/// # Compatibility Rules
192/// - **Field Matching**: Fields are matched by name (case-sensitive)
193/// - **Missing Target Fields**: Allowed - will be filled with null values during casting
194/// - **Extra Source Fields**: Allowed - will be ignored during casting
195/// - **Type Compatibility**: Each matching field must be castable using Arrow's type system
196/// - **Nested Structs**: Recursively validates nested struct compatibility
197///
198/// # Arguments
199/// * `source_fields` - Fields from the source struct type
200/// * `target_fields` - Fields from the target struct type
201///
202/// # Returns
203/// * `Ok(())` if the structs are compatible for casting
204/// * `Err(DataFusionError)` with detailed error message if incompatible
205///
206/// # Examples
207/// ```text
208/// // Compatible: source has extra field, target has missing field
209/// // Source: {a: i32, b: string, c: f64}
210/// // Target: {a: i64, d: bool}
211/// // Result: Ok(()) - 'a' can cast i32->i64, 'b','c' ignored, 'd' filled with nulls
212///
213/// // Incompatible: matching field has incompatible types
214/// // Source: {a: string}
215/// // Target: {a: binary}
216/// // Result: Err(...) - string cannot cast to binary
217/// ```
218///
219pub fn validate_struct_compatibility(
220    source_fields: &[FieldRef],
221    target_fields: &[FieldRef],
222) -> Result<()> {
223    let has_overlap = has_one_of_more_common_fields(source_fields, target_fields);
224    if !has_overlap {
225        return _plan_err!(
226            "Cannot cast struct with {} fields to {} fields because there is no field name overlap",
227            source_fields.len(),
228            target_fields.len()
229        );
230    }
231
232    // Check compatibility for each target field
233    for target_field in target_fields {
234        // Look for matching field in source by name
235        if let Some(source_field) = source_fields
236            .iter()
237            .find(|f| f.name() == target_field.name())
238        {
239            validate_field_compatibility(source_field, target_field)?;
240        } else {
241            // Target field is missing from source
242            // If it's non-nullable, we cannot fill it with NULL
243            if !target_field.is_nullable() {
244                return _plan_err!(
245                    "Cannot cast struct: target field '{}' is non-nullable but missing from source. \
246                     Cannot fill with NULL.",
247                    target_field.name()
248                );
249            }
250        }
251    }
252
253    // Extra fields in source are OK - they'll be ignored
254    Ok(())
255}
256
257fn validate_field_compatibility(
258    source_field: &Field,
259    target_field: &Field,
260) -> Result<()> {
261    if source_field.data_type() == &DataType::Null {
262        // Validate that target allows nulls before returning early.
263        // It is invalid to cast a NULL source field to a non-nullable target field.
264        if !target_field.is_nullable() {
265            return _plan_err!(
266                "Cannot cast NULL struct field '{}' to non-nullable field '{}'",
267                source_field.name(),
268                target_field.name()
269            );
270        }
271        return Ok(());
272    }
273
274    // Ensure nullability is compatible. It is invalid to cast a nullable
275    // source field to a non-nullable target field as this may discard
276    // null values.
277    if source_field.is_nullable() && !target_field.is_nullable() {
278        return _plan_err!(
279            "Cannot cast nullable struct field '{}' to non-nullable field",
280            target_field.name()
281        );
282    }
283
284    // Check if the matching field types are compatible
285    match (source_field.data_type(), target_field.data_type()) {
286        // Recursively validate nested structs
287        (Struct(source_nested), Struct(target_nested)) => {
288            validate_struct_compatibility(source_nested, target_nested)?;
289        }
290        // For non-struct types, use the existing castability check
291        _ => {
292            if !arrow::compute::can_cast_types(
293                source_field.data_type(),
294                target_field.data_type(),
295            ) {
296                return _plan_err!(
297                    "Cannot cast struct field '{}' from type {} to type {}",
298                    target_field.name(),
299                    source_field.data_type(),
300                    target_field.data_type()
301                );
302            }
303        }
304    }
305
306    Ok(())
307}
308
309/// Check if two field lists have at least one common field by name.
310///
311/// This is useful for validating struct compatibility when casting between structs,
312/// ensuring that source and target fields have overlapping names.
313pub fn has_one_of_more_common_fields(
314    source_fields: &[FieldRef],
315    target_fields: &[FieldRef],
316) -> bool {
317    let source_names: HashSet<&str> = source_fields
318        .iter()
319        .map(|field| field.name().as_str())
320        .collect();
321    target_fields
322        .iter()
323        .any(|field| source_names.contains(field.name().as_str()))
324}
325
326#[cfg(test)]
327mod tests {
328
329    use super::*;
330    use crate::{assert_contains, format::DEFAULT_CAST_OPTIONS};
331    use arrow::{
332        array::{
333            BinaryArray, Int32Array, Int32Builder, Int64Array, ListArray, MapArray,
334            MapBuilder, NullArray, StringArray, StringBuilder,
335        },
336        buffer::NullBuffer,
337        datatypes::{DataType, Field, FieldRef, Int32Type},
338    };
339    /// Macro to extract and downcast a column from a StructArray
340    macro_rules! get_column_as {
341        ($struct_array:expr, $column_name:expr, $array_type:ty) => {
342            $struct_array
343                .column_by_name($column_name)
344                .unwrap()
345                .as_any()
346                .downcast_ref::<$array_type>()
347                .unwrap()
348        };
349    }
350
351    fn field(name: &str, data_type: DataType) -> Field {
352        Field::new(name, data_type, true)
353    }
354
355    fn non_null_field(name: &str, data_type: DataType) -> Field {
356        Field::new(name, data_type, false)
357    }
358
359    fn arc_field(name: &str, data_type: DataType) -> FieldRef {
360        Arc::new(field(name, data_type))
361    }
362
363    fn struct_type(fields: Vec<Field>) -> DataType {
364        Struct(fields.into())
365    }
366
367    fn struct_field(name: &str, fields: Vec<Field>) -> Field {
368        field(name, struct_type(fields))
369    }
370
371    fn arc_struct_field(name: &str, fields: Vec<Field>) -> FieldRef {
372        Arc::new(struct_field(name, fields))
373    }
374
375    #[test]
376    fn test_cast_simple_column() {
377        let source = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
378        let target_field = field("ints", DataType::Int64);
379        let result = cast_column(&source, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
380        let result = result.as_any().downcast_ref::<Int64Array>().unwrap();
381        assert_eq!(result.len(), 3);
382        assert_eq!(result.value(0), 1);
383        assert_eq!(result.value(1), 2);
384        assert_eq!(result.value(2), 3);
385    }
386
387    #[test]
388    fn test_cast_column_with_options() {
389        let source = Arc::new(Int64Array::from(vec![1, i64::MAX])) as ArrayRef;
390        let target_field = field("ints", DataType::Int32);
391
392        let safe_opts = CastOptions {
393            // safe: false - return Err for failure
394            safe: false,
395            ..DEFAULT_CAST_OPTIONS
396        };
397        assert!(cast_column(&source, &target_field, &safe_opts).is_err());
398
399        let unsafe_opts = CastOptions {
400            // safe: true - return Null for failure
401            safe: true,
402            ..DEFAULT_CAST_OPTIONS
403        };
404        let result = cast_column(&source, &target_field, &unsafe_opts).unwrap();
405        let result = result.as_any().downcast_ref::<Int32Array>().unwrap();
406        assert_eq!(result.value(0), 1);
407        assert!(result.is_null(1));
408    }
409
410    #[test]
411    fn test_cast_struct_with_missing_field() {
412        let a_array = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
413        let source_struct = StructArray::from(vec![(
414            arc_field("a", DataType::Int32),
415            Arc::clone(&a_array),
416        )]);
417        let source_col = Arc::new(source_struct) as ArrayRef;
418
419        let target_field = struct_field(
420            "s",
421            vec![field("a", DataType::Int32), field("b", DataType::Utf8)],
422        );
423
424        let result =
425            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
426        let struct_array = result.as_any().downcast_ref::<StructArray>().unwrap();
427        assert_eq!(struct_array.fields().len(), 2);
428        let a_result = get_column_as!(&struct_array, "a", Int32Array);
429        assert_eq!(a_result.value(0), 1);
430        assert_eq!(a_result.value(1), 2);
431
432        let b_result = get_column_as!(&struct_array, "b", StringArray);
433        assert_eq!(b_result.len(), 2);
434        assert!(b_result.is_null(0));
435        assert!(b_result.is_null(1));
436    }
437
438    #[test]
439    fn test_cast_struct_source_not_struct() {
440        let source = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
441        let target_field = struct_field("s", vec![field("a", DataType::Int32)]);
442
443        let result = cast_column(&source, &target_field, &DEFAULT_CAST_OPTIONS);
444        assert!(result.is_err());
445        let error_msg = result.unwrap_err().to_string();
446        assert!(error_msg.contains("Cannot cast column of type"));
447        assert!(error_msg.contains("to struct type"));
448        assert!(error_msg.contains("Source must be a struct"));
449    }
450
451    #[test]
452    fn test_cast_struct_incompatible_child_type() {
453        let a_array = Arc::new(BinaryArray::from(vec![
454            Some(b"a".as_ref()),
455            Some(b"b".as_ref()),
456        ])) as ArrayRef;
457        let source_struct =
458            StructArray::from(vec![(arc_field("a", DataType::Binary), a_array)]);
459        let source_col = Arc::new(source_struct) as ArrayRef;
460
461        let target_field = struct_field("s", vec![field("a", DataType::Int32)]);
462
463        let result = cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS);
464        assert!(result.is_err());
465        let error_msg = result.unwrap_err().to_string();
466        assert!(error_msg.contains("Cannot cast struct field 'a'"));
467    }
468
469    #[test]
470    fn test_validate_struct_compatibility_incompatible_types() {
471        // Source struct: {field1: Binary, field2: String}
472        let source_fields = vec![
473            arc_field("field1", DataType::Binary),
474            arc_field("field2", DataType::Utf8),
475        ];
476
477        // Target struct: {field1: Int32}
478        let target_fields = vec![arc_field("field1", DataType::Int32)];
479
480        let result = validate_struct_compatibility(&source_fields, &target_fields);
481        assert!(result.is_err());
482        let error_msg = result.unwrap_err().to_string();
483        assert!(error_msg.contains("Cannot cast struct field 'field1'"));
484        assert!(error_msg.contains("Binary"));
485        assert!(error_msg.contains("Int32"));
486    }
487
488    #[test]
489    fn test_validate_struct_compatibility_compatible_types() {
490        // Source struct: {field1: Int32, field2: String}
491        let source_fields = vec![
492            arc_field("field1", DataType::Int32),
493            arc_field("field2", DataType::Utf8),
494        ];
495
496        // Target struct: {field1: Int64} (Int32 can cast to Int64)
497        let target_fields = vec![arc_field("field1", DataType::Int64)];
498
499        let result = validate_struct_compatibility(&source_fields, &target_fields);
500        assert!(result.is_ok());
501    }
502
503    #[test]
504    fn test_validate_struct_compatibility_missing_field_in_source() {
505        // Source struct: {field1: Int32} (missing field2)
506        let source_fields = vec![arc_field("field1", DataType::Int32)];
507
508        // Target struct: {field1: Int32, field2: Utf8}
509        let target_fields = vec![
510            arc_field("field1", DataType::Int32),
511            arc_field("field2", DataType::Utf8),
512        ];
513
514        // Should be OK - missing fields will be filled with nulls
515        let result = validate_struct_compatibility(&source_fields, &target_fields);
516        assert!(result.is_ok());
517    }
518
519    #[test]
520    fn test_validate_struct_compatibility_additional_field_in_source() {
521        // Source struct: {field1: Int32, field2: String} (extra field2)
522        let source_fields = vec![
523            arc_field("field1", DataType::Int32),
524            arc_field("field2", DataType::Utf8),
525        ];
526
527        // Target struct: {field1: Int32}
528        let target_fields = vec![arc_field("field1", DataType::Int32)];
529
530        // Should be OK - extra fields in source are ignored
531        let result = validate_struct_compatibility(&source_fields, &target_fields);
532        assert!(result.is_ok());
533    }
534
535    #[test]
536    fn test_validate_struct_compatibility_no_overlap_mismatch_len() {
537        let source_fields = vec![
538            arc_field("left", DataType::Int32),
539            arc_field("right", DataType::Int32),
540        ];
541        let target_fields = vec![arc_field("alpha", DataType::Int32)];
542
543        let result = validate_struct_compatibility(&source_fields, &target_fields);
544        assert!(result.is_err());
545        let error_msg = result.unwrap_err().to_string();
546        assert_contains!(error_msg, "no field name overlap");
547    }
548
549    #[test]
550    fn test_cast_struct_parent_nulls_retained() {
551        let a_array = Arc::new(Int32Array::from(vec![Some(1), Some(2)])) as ArrayRef;
552        let fields = vec![arc_field("a", DataType::Int32)];
553        let nulls = Some(NullBuffer::from(vec![true, false]));
554        let source_struct = StructArray::new(fields.clone().into(), vec![a_array], nulls);
555        let source_col = Arc::new(source_struct) as ArrayRef;
556
557        let target_field = struct_field("s", vec![field("a", DataType::Int64)]);
558
559        let result =
560            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
561        let struct_array = result.as_any().downcast_ref::<StructArray>().unwrap();
562        assert_eq!(struct_array.null_count(), 1);
563        assert!(struct_array.is_valid(0));
564        assert!(struct_array.is_null(1));
565
566        let a_result = get_column_as!(&struct_array, "a", Int64Array);
567        assert_eq!(a_result.value(0), 1);
568        assert_eq!(a_result.value(1), 2);
569    }
570
571    #[test]
572    fn test_validate_struct_compatibility_nullable_to_non_nullable() {
573        // Source struct: {field1: Int32 nullable}
574        let source_fields = vec![arc_field("field1", DataType::Int32)];
575
576        // Target struct: {field1: Int32 non-nullable}
577        let target_fields = vec![Arc::new(non_null_field("field1", DataType::Int32))];
578
579        let result = validate_struct_compatibility(&source_fields, &target_fields);
580        assert!(result.is_err());
581        let error_msg = result.unwrap_err().to_string();
582        assert!(error_msg.contains("field1"));
583        assert!(error_msg.contains("non-nullable"));
584    }
585
586    #[test]
587    fn test_validate_struct_compatibility_non_nullable_to_nullable() {
588        // Source struct: {field1: Int32 non-nullable}
589        let source_fields = vec![Arc::new(non_null_field("field1", DataType::Int32))];
590
591        // Target struct: {field1: Int32 nullable}
592        let target_fields = vec![arc_field("field1", DataType::Int32)];
593
594        let result = validate_struct_compatibility(&source_fields, &target_fields);
595        assert!(result.is_ok());
596    }
597
598    #[test]
599    fn test_validate_struct_compatibility_nested_nullable_to_non_nullable() {
600        // Source struct: {field1: {nested: Int32 nullable}}
601        let source_fields = vec![Arc::new(non_null_field(
602            "field1",
603            struct_type(vec![field("nested", DataType::Int32)]),
604        ))];
605
606        // Target struct: {field1: {nested: Int32 non-nullable}}
607        let target_fields = vec![Arc::new(non_null_field(
608            "field1",
609            struct_type(vec![non_null_field("nested", DataType::Int32)]),
610        ))];
611
612        let result = validate_struct_compatibility(&source_fields, &target_fields);
613        assert!(result.is_err());
614        let error_msg = result.unwrap_err().to_string();
615        assert!(error_msg.contains("nested"));
616        assert!(error_msg.contains("non-nullable"));
617    }
618
619    #[test]
620    fn test_validate_struct_compatibility_by_name() {
621        // Source struct: {field1: Int32, field2: String}
622        let source_fields = vec![
623            arc_field("field1", DataType::Int32),
624            arc_field("field2", DataType::Utf8),
625        ];
626
627        // Target struct: {field2: String, field1: Int64}
628        let target_fields = vec![
629            arc_field("field2", DataType::Utf8),
630            arc_field("field1", DataType::Int64),
631        ];
632
633        let result = validate_struct_compatibility(&source_fields, &target_fields);
634        assert!(result.is_ok());
635    }
636
637    #[test]
638    fn test_validate_struct_compatibility_by_name_with_type_mismatch() {
639        // Source struct: {field1: Binary}
640        let source_fields = vec![arc_field("field1", DataType::Binary)];
641
642        // Target struct: {field1: Int32} (incompatible type)
643        let target_fields = vec![arc_field("field1", DataType::Int32)];
644
645        let result = validate_struct_compatibility(&source_fields, &target_fields);
646        assert!(result.is_err());
647        let error_msg = result.unwrap_err().to_string();
648        assert_contains!(
649            error_msg,
650            "Cannot cast struct field 'field1' from type Binary to type Int32"
651        );
652    }
653
654    #[test]
655    fn test_validate_struct_compatibility_no_overlap_equal_len() {
656        let source_fields = vec![
657            arc_field("left", DataType::Int32),
658            arc_field("right", DataType::Utf8),
659        ];
660
661        let target_fields = vec![
662            arc_field("alpha", DataType::Int32),
663            arc_field("beta", DataType::Utf8),
664        ];
665
666        let result = validate_struct_compatibility(&source_fields, &target_fields);
667        assert!(result.is_err());
668        let error_msg = result.unwrap_err().to_string();
669        assert_contains!(error_msg, "no field name overlap");
670    }
671
672    #[test]
673    fn test_validate_struct_compatibility_mixed_name_overlap() {
674        // Source struct: {a: Int32, b: String, extra: Boolean}
675        let source_fields = vec![
676            arc_field("a", DataType::Int32),
677            arc_field("b", DataType::Utf8),
678            arc_field("extra", DataType::Boolean),
679        ];
680
681        // Target struct: {b: String, a: Int64, c: Float32}
682        // Name overlap with a and b, missing c (nullable)
683        let target_fields = vec![
684            arc_field("b", DataType::Utf8),
685            arc_field("a", DataType::Int64),
686            arc_field("c", DataType::Float32),
687        ];
688
689        let result = validate_struct_compatibility(&source_fields, &target_fields);
690        assert!(result.is_ok());
691    }
692
693    #[test]
694    fn test_validate_struct_compatibility_by_name_missing_required_field() {
695        // Source struct: {field1: Int32} (missing field2)
696        let source_fields = vec![arc_field("field1", DataType::Int32)];
697
698        // Target struct: {field1: Int32, field2: Int32 non-nullable}
699        let target_fields = vec![
700            arc_field("field1", DataType::Int32),
701            Arc::new(non_null_field("field2", DataType::Int32)),
702        ];
703
704        let result = validate_struct_compatibility(&source_fields, &target_fields);
705        assert!(result.is_err());
706        let error_msg = result.unwrap_err().to_string();
707        assert_contains!(
708            error_msg,
709            "Cannot cast struct: target field 'field2' is non-nullable but missing from source. Cannot fill with NULL."
710        );
711    }
712
713    #[test]
714    fn test_validate_struct_compatibility_partial_name_overlap_with_count_mismatch() {
715        // Source struct: {a: Int32} (only one field)
716        let source_fields = vec![arc_field("a", DataType::Int32)];
717
718        // Target struct: {a: Int32, b: String} (two fields, but 'a' overlaps)
719        let target_fields = vec![
720            arc_field("a", DataType::Int32),
721            arc_field("b", DataType::Utf8),
722        ];
723
724        // This should succeed - partial overlap means by-name mapping
725        // and missing field 'b' is nullable
726        let result = validate_struct_compatibility(&source_fields, &target_fields);
727        assert!(result.is_ok());
728    }
729
730    #[test]
731    fn test_cast_nested_struct_with_extra_and_missing_fields() {
732        // Source inner struct has fields a, b, extra
733        let a = Arc::new(Int32Array::from(vec![Some(1), None])) as ArrayRef;
734        let b = Arc::new(Int32Array::from(vec![Some(2), Some(3)])) as ArrayRef;
735        let extra = Arc::new(Int32Array::from(vec![Some(9), Some(10)])) as ArrayRef;
736
737        let inner = StructArray::from(vec![
738            (arc_field("a", DataType::Int32), a),
739            (arc_field("b", DataType::Int32), b),
740            (arc_field("extra", DataType::Int32), extra),
741        ]);
742
743        let source_struct = StructArray::from(vec![(
744            arc_struct_field(
745                "inner",
746                vec![
747                    field("a", DataType::Int32),
748                    field("b", DataType::Int32),
749                    field("extra", DataType::Int32),
750                ],
751            ),
752            Arc::new(inner) as ArrayRef,
753        )]);
754        let source_col = Arc::new(source_struct) as ArrayRef;
755
756        // Target inner struct reorders fields, adds "missing", and drops "extra"
757        let target_field = struct_field(
758            "outer",
759            vec![struct_field(
760                "inner",
761                vec![
762                    field("b", DataType::Int64),
763                    field("a", DataType::Int32),
764                    field("missing", DataType::Int32),
765                ],
766            )],
767        );
768
769        let result =
770            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
771        let outer = result.as_any().downcast_ref::<StructArray>().unwrap();
772        let inner = get_column_as!(&outer, "inner", StructArray);
773        assert_eq!(inner.fields().len(), 3);
774
775        let b = get_column_as!(inner, "b", Int64Array);
776        assert_eq!(b.value(0), 2);
777        assert_eq!(b.value(1), 3);
778        assert!(!b.is_null(0));
779        assert!(!b.is_null(1));
780
781        let a = get_column_as!(inner, "a", Int32Array);
782        assert_eq!(a.value(0), 1);
783        assert!(a.is_null(1));
784
785        let missing = get_column_as!(inner, "missing", Int32Array);
786        assert!(missing.is_null(0));
787        assert!(missing.is_null(1));
788    }
789
790    #[test]
791    fn test_cast_null_struct_field_to_nested_struct() {
792        let null_inner = Arc::new(NullArray::new(2)) as ArrayRef;
793        let source_struct = StructArray::from(vec![(
794            arc_field("inner", DataType::Null),
795            Arc::clone(&null_inner),
796        )]);
797        let source_col = Arc::new(source_struct) as ArrayRef;
798
799        let target_field = struct_field(
800            "outer",
801            vec![struct_field("inner", vec![field("a", DataType::Int32)])],
802        );
803
804        let result =
805            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
806        let outer = result.as_any().downcast_ref::<StructArray>().unwrap();
807        let inner = get_column_as!(&outer, "inner", StructArray);
808        assert_eq!(inner.len(), 2);
809        assert!(inner.is_null(0));
810        assert!(inner.is_null(1));
811
812        let inner_a = get_column_as!(inner, "a", Int32Array);
813        assert!(inner_a.is_null(0));
814        assert!(inner_a.is_null(1));
815    }
816
817    #[test]
818    fn test_cast_struct_with_array_and_map_fields() {
819        // Array field with second row null
820        let arr_array = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
821            Some(vec![Some(1), Some(2)]),
822            None,
823        ])) as ArrayRef;
824
825        // Map field with second row null
826        let string_builder = StringBuilder::new();
827        let int_builder = Int32Builder::new();
828        let mut map_builder = MapBuilder::new(None, string_builder, int_builder);
829        map_builder.keys().append_value("a");
830        map_builder.values().append_value(1);
831        map_builder.append(true).unwrap();
832        map_builder.append(false).unwrap();
833        let map_array = Arc::new(map_builder.finish()) as ArrayRef;
834
835        let source_struct = StructArray::from(vec![
836            (
837                arc_field(
838                    "arr",
839                    DataType::List(Arc::new(field("item", DataType::Int32))),
840                ),
841                arr_array,
842            ),
843            (
844                arc_field(
845                    "map",
846                    DataType::Map(
847                        Arc::new(non_null_field(
848                            "entries",
849                            struct_type(vec![
850                                non_null_field("keys", DataType::Utf8),
851                                field("values", DataType::Int32),
852                            ]),
853                        )),
854                        false,
855                    ),
856                ),
857                map_array,
858            ),
859        ]);
860        let source_col = Arc::new(source_struct) as ArrayRef;
861
862        let target_field = struct_field(
863            "s",
864            vec![
865                field(
866                    "arr",
867                    DataType::List(Arc::new(field("item", DataType::Int32))),
868                ),
869                field(
870                    "map",
871                    DataType::Map(
872                        Arc::new(non_null_field(
873                            "entries",
874                            struct_type(vec![
875                                non_null_field("keys", DataType::Utf8),
876                                field("values", DataType::Int32),
877                            ]),
878                        )),
879                        false,
880                    ),
881                ),
882            ],
883        );
884
885        let result =
886            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
887        let struct_array = result.as_any().downcast_ref::<StructArray>().unwrap();
888
889        let arr = get_column_as!(&struct_array, "arr", ListArray);
890        assert!(!arr.is_null(0));
891        assert!(arr.is_null(1));
892        let arr0 = arr.value(0);
893        let values = arr0.as_any().downcast_ref::<Int32Array>().unwrap();
894        assert_eq!(values.value(0), 1);
895        assert_eq!(values.value(1), 2);
896
897        let map = get_column_as!(&struct_array, "map", MapArray);
898        assert!(!map.is_null(0));
899        assert!(map.is_null(1));
900        let map0 = map.value(0);
901        let entries = map0.as_any().downcast_ref::<StructArray>().unwrap();
902        let keys = get_column_as!(entries, "keys", StringArray);
903        let vals = get_column_as!(entries, "values", Int32Array);
904        assert_eq!(keys.value(0), "a");
905        assert_eq!(vals.value(0), 1);
906    }
907
908    #[test]
909    fn test_cast_struct_field_order_differs() {
910        let a = Arc::new(Int32Array::from(vec![Some(1), Some(2)])) as ArrayRef;
911        let b = Arc::new(Int32Array::from(vec![Some(3), None])) as ArrayRef;
912
913        let source_struct = StructArray::from(vec![
914            (arc_field("a", DataType::Int32), a),
915            (arc_field("b", DataType::Int32), b),
916        ]);
917        let source_col = Arc::new(source_struct) as ArrayRef;
918
919        let target_field = struct_field(
920            "s",
921            vec![field("b", DataType::Int64), field("a", DataType::Int32)],
922        );
923
924        let result =
925            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
926        let struct_array = result.as_any().downcast_ref::<StructArray>().unwrap();
927
928        let b_col = get_column_as!(&struct_array, "b", Int64Array);
929        assert_eq!(b_col.value(0), 3);
930        assert!(b_col.is_null(1));
931
932        let a_col = get_column_as!(&struct_array, "a", Int32Array);
933        assert_eq!(a_col.value(0), 1);
934        assert_eq!(a_col.value(1), 2);
935    }
936
937    #[test]
938    fn test_cast_struct_no_overlap_rejected() {
939        let first = Arc::new(Int32Array::from(vec![Some(10), Some(20)])) as ArrayRef;
940        let second =
941            Arc::new(StringArray::from(vec![Some("alpha"), Some("beta")])) as ArrayRef;
942
943        let source_struct = StructArray::from(vec![
944            (arc_field("left", DataType::Int32), first),
945            (arc_field("right", DataType::Utf8), second),
946        ]);
947        let source_col = Arc::new(source_struct) as ArrayRef;
948
949        let target_field = struct_field(
950            "s",
951            vec![field("a", DataType::Int64), field("b", DataType::Utf8)],
952        );
953
954        let result = cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS);
955        assert!(result.is_err());
956        let error_msg = result.unwrap_err().to_string();
957        assert_contains!(error_msg, "no field name overlap");
958    }
959
960    #[test]
961    fn test_cast_struct_missing_non_nullable_field_fails() {
962        // Source has only field 'a'
963        let a = Arc::new(Int32Array::from(vec![Some(1), Some(2)])) as ArrayRef;
964        let source_struct = StructArray::from(vec![(arc_field("a", DataType::Int32), a)]);
965        let source_col = Arc::new(source_struct) as ArrayRef;
966
967        // Target has fields 'a' (nullable) and 'b' (non-nullable)
968        let target_field = struct_field(
969            "s",
970            vec![
971                field("a", DataType::Int32),
972                non_null_field("b", DataType::Int32),
973            ],
974        );
975
976        // Should fail because 'b' is non-nullable but missing from source
977        let result = cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS);
978        assert!(result.is_err());
979        let err = result.unwrap_err();
980        assert!(
981            err.to_string()
982                .contains("target field 'b' is non-nullable but missing from source"),
983            "Unexpected error: {err}"
984        );
985    }
986
987    #[test]
988    fn test_cast_struct_missing_nullable_field_succeeds() {
989        // Source has only field 'a'
990        let a = Arc::new(Int32Array::from(vec![Some(1), Some(2)])) as ArrayRef;
991        let source_struct = StructArray::from(vec![(arc_field("a", DataType::Int32), a)]);
992        let source_col = Arc::new(source_struct) as ArrayRef;
993
994        // Target has fields 'a' and 'b' (both nullable)
995        let target_field = struct_field(
996            "s",
997            vec![field("a", DataType::Int32), field("b", DataType::Int32)],
998        );
999
1000        // Should succeed - 'b' is nullable so can be filled with NULL
1001        let result =
1002            cast_column(&source_col, &target_field, &DEFAULT_CAST_OPTIONS).unwrap();
1003        let struct_array = result.as_any().downcast_ref::<StructArray>().unwrap();
1004
1005        let a_col = get_column_as!(&struct_array, "a", Int32Array);
1006        assert_eq!(a_col.value(0), 1);
1007        assert_eq!(a_col.value(1), 2);
1008
1009        let b_col = get_column_as!(&struct_array, "b", Int32Array);
1010        assert!(b_col.is_null(0));
1011        assert!(b_col.is_null(1));
1012    }
1013}