Skip to main content

datafusion_functions_nested/
map.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::VecDeque;
19use std::hash::Hash;
20use std::sync::Arc;
21
22use arrow::array::{
23    Array, ArrayData, ArrayRef, ArrowPrimitiveType, MapArray, OffsetSizeTrait,
24    StructArray, cast::AsArray,
25};
26use arrow::buffer::Buffer;
27use arrow::datatypes::{
28    DataType, Date32Type, Date64Type, Field, Int8Type, Int16Type, Int32Type, Int64Type,
29    SchemaBuilder, ToByteSlice, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
30};
31
32use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays};
33use datafusion_common::{
34    HashSet, Result, ScalarValue, exec_err, utils::take_function_args,
35};
36use datafusion_expr::expr::ScalarFunction;
37use datafusion_expr::{
38    ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDFImpl, Signature,
39    Volatility,
40};
41use datafusion_macros::user_doc;
42
43use crate::make_array::make_array;
44
45/// Returns a map created from a key list and a value list
46pub fn map(keys: Vec<Expr>, values: Vec<Expr>) -> Expr {
47    let keys = make_array(keys);
48    let values = make_array(values);
49    Expr::ScalarFunction(ScalarFunction::new_udf(map_udf(), vec![keys, values]))
50}
51
52create_func!(MapFunc, map_udf);
53
54/// Check if we can evaluate the expr to constant directly.
55///
56/// # Example
57/// ```sql
58/// SELECT make_map('type', 'test') from test
59/// ```
60/// We can evaluate the result of `make_map` directly.
61fn can_evaluate_to_const(args: &[ColumnarValue]) -> bool {
62    args.iter()
63        .all(|arg| matches!(arg, ColumnarValue::Scalar(_)))
64}
65
66fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
67    let [keys_arg, values_arg] = take_function_args("make_map", args)?;
68
69    let can_evaluate_to_const = can_evaluate_to_const(args);
70
71    let keys = get_first_array_ref(keys_arg)?;
72    let key_array = keys.as_ref();
73
74    match keys_arg {
75        ColumnarValue::Array(_) => match key_array.data_type() {
76            DataType::List(_) => keys
77                .as_list::<i32>()
78                .iter()
79                .flatten()
80                .try_for_each(|row| validate_map_keys(row.as_ref()))?,
81            DataType::LargeList(_) => keys
82                .as_list::<i64>()
83                .iter()
84                .flatten()
85                .try_for_each(|row| validate_map_keys(row.as_ref()))?,
86            DataType::FixedSizeList(_, _) => {
87                keys.as_fixed_size_list()
88                    .iter()
89                    .flatten()
90                    .try_for_each(|row| validate_map_keys(row.as_ref()))?
91            }
92            data_type => {
93                return exec_err!(
94                    "Expected list, large_list or fixed_size_list, got {:?}",
95                    data_type
96                );
97            }
98        },
99        ColumnarValue::Scalar(_) => {
100            validate_map_keys(key_array)?;
101        }
102    }
103
104    let values = get_first_array_ref(values_arg)?;
105
106    make_map_batch_internal(&keys, &values, can_evaluate_to_const, &keys_arg.data_type())
107}
108
109fn validate_unique_primitive_keys<T: ArrowPrimitiveType>(array: &dyn Array) -> Result<()>
110where
111    T::Native: Copy + Eq + Hash + std::fmt::Display,
112{
113    let primitive_array = array.as_primitive::<T>();
114    if primitive_array.null_count() > 0 {
115        return exec_err!("map key cannot be null");
116    }
117
118    if let Some(value) = find_duplicate_value(
119        primitive_array.len(),
120        primitive_array.values().iter().copied(),
121    ) {
122        return exec_err!("map key must be unique, duplicate key found: {}", value);
123    }
124
125    Ok(())
126}
127
128fn validate_unique_str_keys<'a>(
129    null_count: usize,
130    len: usize,
131    values: impl IntoIterator<Item = &'a str>,
132) -> Result<()> {
133    if null_count > 0 {
134        return exec_err!("map key cannot be null");
135    }
136
137    if let Some(value) = find_duplicate_value(len, values) {
138        return exec_err!("map key must be unique, duplicate key found: {}", value);
139    }
140
141    Ok(())
142}
143
144fn validate_unique_binary_keys<'a>(
145    null_count: usize,
146    len: usize,
147    values: impl IntoIterator<Item = &'a [u8]>,
148) -> Result<()> {
149    if null_count > 0 {
150        return exec_err!("map key cannot be null");
151    }
152
153    if let Some(value) = find_duplicate_value(len, values) {
154        return exec_err!("map key must be unique, duplicate key found: {:?}", value);
155    }
156
157    Ok(())
158}
159
160fn find_duplicate_value<T, I>(len: usize, values: I) -> Option<T>
161where
162    T: Copy + Eq + Hash,
163    I: IntoIterator<Item = T>,
164{
165    let mut seen_keys = HashSet::with_capacity(len);
166    values.into_iter().find(|value| !seen_keys.insert(*value))
167}
168
169fn validate_unique_keys_generic(array: &dyn Array) -> Result<()> {
170    let mut seen_keys = HashSet::with_capacity(array.len());
171
172    for i in 0..array.len() {
173        let key = ScalarValue::try_from_array(array, i)?;
174
175        // Validation 1: Map keys cannot be null
176        if key.is_null() {
177            return exec_err!("map key cannot be null");
178        }
179
180        // Validation 2: Map keys must be unique
181        if seen_keys.contains(&key) {
182            return exec_err!("map key must be unique, duplicate key found: {}", key);
183        }
184        seen_keys.insert(key);
185    }
186    Ok(())
187}
188
189/// Validates that map keys are non-null and unique.
190fn validate_map_keys(array: &dyn Array) -> Result<()> {
191    match array.data_type() {
192        DataType::Int8 => validate_unique_primitive_keys::<Int8Type>(array),
193        DataType::Int16 => validate_unique_primitive_keys::<Int16Type>(array),
194        DataType::Int32 => validate_unique_primitive_keys::<Int32Type>(array),
195        DataType::Int64 => validate_unique_primitive_keys::<Int64Type>(array),
196        DataType::UInt8 => validate_unique_primitive_keys::<UInt8Type>(array),
197        DataType::UInt16 => validate_unique_primitive_keys::<UInt16Type>(array),
198        DataType::UInt32 => validate_unique_primitive_keys::<UInt32Type>(array),
199        DataType::UInt64 => validate_unique_primitive_keys::<UInt64Type>(array),
200        DataType::Date32 => validate_unique_primitive_keys::<Date32Type>(array),
201        DataType::Date64 => validate_unique_primitive_keys::<Date64Type>(array),
202        DataType::Utf8 => {
203            let arr = array.as_string::<i32>();
204            validate_unique_str_keys(arr.null_count(), arr.len(), arr.iter().flatten())
205        }
206        DataType::LargeUtf8 => {
207            let arr = array.as_string::<i64>();
208            validate_unique_str_keys(arr.null_count(), arr.len(), arr.iter().flatten())
209        }
210        DataType::Utf8View => {
211            let arr = array.as_string_view();
212            validate_unique_str_keys(arr.null_count(), arr.len(), arr.iter().flatten())
213        }
214        DataType::Binary => {
215            let arr = array.as_binary::<i32>();
216            validate_unique_binary_keys(arr.null_count(), arr.len(), arr.iter().flatten())
217        }
218        DataType::LargeBinary => {
219            let arr = array.as_binary::<i64>();
220            validate_unique_binary_keys(arr.null_count(), arr.len(), arr.iter().flatten())
221        }
222        DataType::BinaryView => {
223            let arr = array.as_binary_view();
224            validate_unique_binary_keys(arr.null_count(), arr.len(), arr.iter().flatten())
225        }
226        _ => validate_unique_keys_generic(array),
227    }
228}
229
230fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result<ArrayRef> {
231    match columnar_value {
232        ColumnarValue::Scalar(value) => match value {
233            ScalarValue::List(array) => Ok(array.value(0)),
234            ScalarValue::LargeList(array) => Ok(array.value(0)),
235            ScalarValue::FixedSizeList(array) => Ok(array.value(0)),
236            _ => exec_err!("Expected array, got {}", value),
237        },
238        ColumnarValue::Array(array) => Ok(array.to_owned()),
239    }
240}
241
242fn make_map_batch_internal(
243    keys: &ArrayRef,
244    values: &ArrayRef,
245    can_evaluate_to_const: bool,
246    data_type: &DataType,
247) -> Result<ColumnarValue> {
248    if keys.len() != values.len() {
249        return exec_err!("map requires key and value lists to have the same length");
250    }
251
252    // Use the array path (make_map_array_internal) in these cases:
253    // 1. Not const evaluation (!can_evaluate_to_const) - allows scalar elimination optimization
254    // 2. NULL maps present (keys.null_count() > 0) - fast path doesn't handle NULL list elements
255    if !can_evaluate_to_const || keys.null_count() > 0 {
256        return match data_type {
257            DataType::LargeList(..) => make_map_array_internal::<i64>(keys, values),
258            DataType::List(..) => make_map_array_internal::<i32>(keys, values),
259            DataType::FixedSizeList(..) => {
260                // FixedSizeList doesn't use OffsetSizeTrait, so handle it separately
261                make_map_array_from_fixed_size_list(keys, values)
262            }
263            _ => exec_err!(
264                "Expected List, LargeList, or FixedSizeList, got {:?}",
265                data_type
266            ),
267        };
268    }
269
270    let key_field = Arc::new(Field::new("key", keys.data_type().clone(), false));
271    let value_field = Arc::new(Field::new("value", values.data_type().clone(), true));
272    let mut entry_struct_buffer: VecDeque<(Arc<Field>, ArrayRef)> = VecDeque::new();
273    let mut entry_offsets_buffer = VecDeque::new();
274    entry_offsets_buffer.push_back(0);
275
276    entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(keys)));
277    entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(values)));
278    entry_offsets_buffer.push_back(keys.len() as u32);
279
280    let entry_struct: Vec<(Arc<Field>, ArrayRef)> = entry_struct_buffer.into();
281    let entry_struct = StructArray::from(entry_struct);
282
283    let map_data_type = DataType::Map(
284        Arc::new(Field::new(
285            "entries",
286            entry_struct.data_type().clone(),
287            false,
288        )),
289        false,
290    );
291
292    let entry_offsets: Vec<u32> = entry_offsets_buffer.into();
293    let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
294
295    let map_data = ArrayData::builder(map_data_type)
296        .len(entry_offsets.len() - 1)
297        .add_buffer(entry_offsets_buffer)
298        .add_child_data(entry_struct.to_data())
299        .build()?;
300    let map_array = Arc::new(MapArray::from(map_data));
301
302    Ok(if can_evaluate_to_const {
303        ColumnarValue::Scalar(ScalarValue::try_from_array(map_array.as_ref(), 0)?)
304    } else {
305        ColumnarValue::Array(map_array)
306    })
307}
308
309#[user_doc(
310    doc_section(label = "Map Functions"),
311    description = "Returns an Arrow map with the specified key-value pairs.\n\n\
312    The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null.",
313    syntax_example = "map(key, value)\nmap(key: value)\nmake_map(['key1', 'key2'], ['value1', 'value2'])",
314    sql_example = r#"
315```sql
316-- Using map function
317SELECT MAP('type', 'test');
318----
319{type: test}
320
321SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]);
322----
323{POST: 41, HEAD: 33, PATCH: NULL}
324
325SELECT MAP([[1,2], [3,4]], ['a', 'b']);
326----
327{[1, 2]: a, [3, 4]: b}
328
329SELECT MAP { 'a': 1, 'b': 2 };
330----
331{a: 1, b: 2}
332
333-- Using make_map function
334SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]);
335----
336{POST: 41, HEAD: 33}
337
338SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]);
339----
340{key1: value1, key2: }
341```"#,
342    argument(
343        name = "key",
344        description = "For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
345                        For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null."
346    ),
347    argument(
348        name = "value",
349        description = "For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators.\n\
350                        For `make_map`: The list of values to be mapped to the corresponding keys."
351    )
352)]
353#[derive(Debug, PartialEq, Eq, Hash)]
354pub struct MapFunc {
355    signature: Signature,
356}
357
358impl Default for MapFunc {
359    fn default() -> Self {
360        Self::new()
361    }
362}
363
364impl MapFunc {
365    pub fn new() -> Self {
366        Self {
367            signature: Signature::variadic_any(Volatility::Immutable),
368        }
369    }
370}
371
372impl ScalarUDFImpl for MapFunc {
373    fn name(&self) -> &str {
374        "map"
375    }
376
377    fn signature(&self) -> &Signature {
378        &self.signature
379    }
380
381    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
382        let [keys_arg, values_arg] = take_function_args(self.name(), arg_types)?;
383        let mut builder = SchemaBuilder::new();
384        builder.push(Field::new(
385            "key",
386            get_element_type(keys_arg)?.clone(),
387            false,
388        ));
389        builder.push(Field::new(
390            "value",
391            get_element_type(values_arg)?.clone(),
392            true,
393        ));
394        let fields = builder.finish().fields;
395        Ok(DataType::Map(
396            Arc::new(Field::new("entries", DataType::Struct(fields), false)),
397            false,
398        ))
399    }
400
401    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
402        make_map_batch(&args.args)
403    }
404
405    fn documentation(&self) -> Option<&Documentation> {
406        self.doc()
407    }
408}
409
410fn get_element_type(data_type: &DataType) -> Result<&DataType> {
411    match data_type {
412        DataType::List(element) => Ok(element.data_type()),
413        DataType::LargeList(element) => Ok(element.data_type()),
414        DataType::FixedSizeList(element, _) => Ok(element.data_type()),
415        _ => exec_err!(
416            "Expected list, large_list or fixed_size_list, got {:?}",
417            data_type
418        ),
419    }
420}
421
422/// Helper function to create MapArray from array of values to support arrays for Map scalar function
423///
424/// ``` text
425/// Format of input KEYS and VALUES column
426///         keys                        values
427/// +---------------------+       +---------------------+
428/// | +-----------------+ |       | +-----------------+ |
429/// | | [k11, k12, k13] | |       | | [v11, v12, v13] | |
430/// | +-----------------+ |       | +-----------------+ |
431/// |                     |       |                     |
432/// | +-----------------+ |       | +-----------------+ |
433/// | | [k21, k22, k23] | |       | | [v21, v22, v23] | |
434/// | +-----------------+ |       | +-----------------+ |
435/// |                     |       |                     |
436/// | +-----------------+ |       | +-----------------+ |
437/// | |[k31, k32, k33]  | |       | |[v31, v32, v33]  | |
438/// | +-----------------+ |       | +-----------------+ |
439/// +---------------------+       +---------------------+
440/// ```
441/// Flattened keys and values array to user create `StructArray`,
442/// which serves as inner child for `MapArray`
443///
444/// ``` text
445/// Flattened           Flattened
446/// Keys                Values
447/// +-----------+      +-----------+
448/// | +-------+ |      | +-------+ |
449/// | |  k11  | |      | |  v11  | |
450/// | +-------+ |      | +-------+ |
451/// | +-------+ |      | +-------+ |
452/// | |  k12  | |      | |  v12  | |
453/// | +-------+ |      | +-------+ |
454/// | +-------+ |      | +-------+ |
455/// | |  k13  | |      | |  v13  | |
456/// | +-------+ |      | +-------+ |
457/// | +-------+ |      | +-------+ |
458/// | |  k21  | |      | |  v21  | |
459/// | +-------+ |      | +-------+ |
460/// | +-------+ |      | +-------+ |
461/// | |  k22  | |      | |  v22  | |
462/// | +-------+ |      | +-------+ |
463/// | +-------+ |      | +-------+ |
464/// | |  k23  | |      | |  v23  | |
465/// | +-------+ |      | +-------+ |
466/// | +-------+ |      | +-------+ |
467/// | |  k31  | |      | |  v31  | |
468/// | +-------+ |      | +-------+ |
469/// | +-------+ |      | +-------+ |
470/// | |  k32  | |      | |  v32  | |
471/// | +-------+ |      | +-------+ |
472/// | +-------+ |      | +-------+ |
473/// | |  k33  | |      | |  v33  | |
474/// | +-------+ |      | +-------+ |
475/// +-----------+      +-----------+
476/// ```text
477fn make_map_array_internal<O: OffsetSizeTrait>(
478    keys: &ArrayRef,
479    values: &ArrayRef,
480) -> Result<ColumnarValue> {
481    // Save original data types and array length before list_to_arrays transforms them
482    let keys_data_type = keys.data_type().clone();
483    let values_data_type = values.data_type().clone();
484    let original_len = keys.len(); // This is the number of rows in the input
485
486    // Save the nulls bitmap from the original keys array (before list_to_arrays)
487    // This tells us which MAP values are NULL (not which keys within maps are null)
488    let nulls_bitmap = keys.nulls().cloned();
489
490    let keys = list_to_arrays::<O>(keys);
491    let values = list_to_arrays_skipping_null_rows::<O>(values, nulls_bitmap.as_ref());
492
493    build_map_array(
494        &keys,
495        &values,
496        &keys_data_type,
497        &values_data_type,
498        original_len,
499        nulls_bitmap,
500    )
501}
502
503/// Helper function specifically for FixedSizeList inputs
504/// Similar to make_map_array_internal but uses fixed_size_list_to_arrays instead of list_to_arrays
505fn make_map_array_from_fixed_size_list(
506    keys: &ArrayRef,
507    values: &ArrayRef,
508) -> Result<ColumnarValue> {
509    // Save original data types and array length
510    let keys_data_type = keys.data_type().clone();
511    let values_data_type = values.data_type().clone();
512    let original_len = keys.len();
513
514    // Save the nulls bitmap from the original keys array
515    let nulls_bitmap = keys.nulls().cloned();
516
517    let keys = fixed_size_list_to_arrays(keys);
518    let values =
519        fixed_size_list_to_arrays_skipping_null_rows(values, nulls_bitmap.as_ref());
520
521    build_map_array(
522        &keys,
523        &values,
524        &keys_data_type,
525        &values_data_type,
526        original_len,
527        nulls_bitmap,
528    )
529}
530fn list_to_arrays_skipping_null_rows<O: OffsetSizeTrait>(
531    array: &ArrayRef,
532    null_rows: Option<&arrow::buffer::NullBuffer>,
533) -> Vec<ArrayRef> {
534    array
535        .as_list::<O>()
536        .iter()
537        .enumerate()
538        .filter_map(|(i, row)| {
539            if null_rows.is_some_and(|nulls| nulls.is_null(i)) {
540                None
541            } else {
542                row
543            }
544        })
545        .collect()
546}
547
548fn fixed_size_list_to_arrays_skipping_null_rows(
549    array: &ArrayRef,
550    null_rows: Option<&arrow::buffer::NullBuffer>,
551) -> Vec<ArrayRef> {
552    array
553        .as_fixed_size_list()
554        .iter()
555        .enumerate()
556        .filter_map(|(i, row)| {
557            if null_rows.is_some_and(|nulls| nulls.is_null(i)) {
558                None
559            } else {
560                row
561            }
562        })
563        .collect()
564}
565
566/// Common logic to build a MapArray from decomposed list arrays
567fn build_map_array(
568    keys: &[ArrayRef],
569    values: &[ArrayRef],
570    keys_data_type: &DataType,
571    values_data_type: &DataType,
572    original_len: usize,
573    nulls_bitmap: Option<arrow::buffer::NullBuffer>,
574) -> Result<ColumnarValue> {
575    if keys.len() != values.len() {
576        return exec_err!("map requires key and value lists to have the same length");
577    }
578
579    let mut key_array_vec = vec![];
580    let mut value_array_vec = vec![];
581    for (k, v) in keys.iter().zip(values.iter()) {
582        key_array_vec.push(k.as_ref());
583        value_array_vec.push(v.as_ref());
584    }
585
586    // Build offset buffer that accounts for NULL maps
587    // For each row, if it's NULL, the offset stays the same (empty range)
588    // If it's not NULL, the offset advances by the number of entries in that map
589    // NOTE: MapArray always requires i32 offsets, regardless of input list type
590    let mut running_offset = 0i32;
591    let mut offset_buffer = vec![running_offset];
592    let mut non_null_idx = 0;
593    for i in 0..original_len {
594        let is_null = nulls_bitmap.as_ref().is_some_and(|nulls| nulls.is_null(i));
595        if !is_null {
596            let entry_count = keys[non_null_idx].len();
597            // Validate that we won't overflow i32 when converting from potentially i64 offsets
598            let entry_count_i32 = i32::try_from(entry_count).map_err(|_| {
599                datafusion_common::DataFusionError::Execution(format!(
600                    "Map offset overflow: entry count {entry_count} at index {i} exceeds i32::MAX",
601                ))
602            })?;
603            running_offset =
604                running_offset.checked_add(entry_count_i32).ok_or_else(|| {
605                    datafusion_common::DataFusionError::Execution(format!(
606                    "Map offset overflow: cumulative offset exceeds i32::MAX at index {i}",
607                ))
608                })?;
609            non_null_idx += 1;
610        }
611        offset_buffer.push(running_offset);
612    }
613
614    // concatenate all the arrays
615    // If key_array_vec is empty, it means all maps were NULL (list elements were NULL).
616    // In this case, we need to create empty arrays with the correct data type.
617    let (flattened_keys, flattened_values) = if key_array_vec.is_empty() {
618        // All maps are NULL - create empty arrays
619        // We need to infer the data type from the original keys/values arrays
620        let key_type = get_element_type(keys_data_type)?;
621        let value_type = get_element_type(values_data_type)?;
622
623        (
624            arrow::array::new_empty_array(key_type),
625            arrow::array::new_empty_array(value_type),
626        )
627    } else {
628        let flattened_keys = arrow::compute::concat(key_array_vec.as_ref())?;
629        if flattened_keys.null_count() > 0 {
630            return exec_err!("keys cannot be null");
631        }
632        let flattened_values = arrow::compute::concat(value_array_vec.as_ref())?;
633        (flattened_keys, flattened_values)
634    };
635
636    let fields = vec![
637        Arc::new(Field::new("key", flattened_keys.data_type().clone(), false)),
638        Arc::new(Field::new(
639            "value",
640            flattened_values.data_type().clone(),
641            true,
642        )),
643    ];
644
645    let struct_data = ArrayData::builder(DataType::Struct(fields.into()))
646        .len(flattened_keys.len())
647        .add_child_data(flattened_keys.to_data())
648        .add_child_data(flattened_values.to_data())
649        .build()?;
650
651    let mut map_data_builder = ArrayData::builder(DataType::Map(
652        Arc::new(Field::new(
653            "entries",
654            struct_data.data_type().clone(),
655            false,
656        )),
657        false,
658    ))
659    .len(original_len) // Use the original number of rows, not the filtered count
660    .add_child_data(struct_data)
661    .add_buffer(Buffer::from_slice_ref(offset_buffer.as_slice()));
662
663    // Add the nulls bitmap if present (to preserve NULL map values)
664    if let Some(nulls) = nulls_bitmap {
665        map_data_builder = map_data_builder.nulls(Some(nulls));
666    }
667
668    let map_data = map_data_builder.build()?;
669    Ok(ColumnarValue::Array(Arc::new(MapArray::from(map_data))))
670}
671
672#[cfg(test)]
673mod tests {
674    use super::*;
675    #[test]
676    fn test_make_map_with_null_maps() {
677        // Test that NULL map values (entire map is NULL) are correctly handled
678        // This test directly calls make_map_batch with a List containing NULL elements
679        //
680        // Background: On main branch, the code would fail with "map key cannot be null"
681        // because it couldn't distinguish between:
682        // - NULL map (entire map is NULL) - should be allowed
683        // - null key within a map - should be rejected
684
685        // Build keys array: [['a'], NULL, ['b']]
686        // The middle NULL represents an entire NULL map, not a null key
687        let mut key_builder =
688            arrow::array::ListBuilder::new(arrow::array::StringBuilder::new());
689
690        // First map: ['a']
691        key_builder.values().append_value("a");
692        key_builder.append(true);
693
694        // Second map: NULL (entire map is NULL)
695        key_builder.append(false);
696
697        // Third map: ['b']
698        key_builder.values().append_value("b");
699        key_builder.append(true);
700
701        let keys_array = Arc::new(key_builder.finish());
702
703        // Build values array: [[1], [2], [3]]
704        let mut value_builder =
705            arrow::array::ListBuilder::new(arrow::array::Int32Builder::new());
706
707        value_builder.values().append_value(1);
708        value_builder.append(true);
709
710        value_builder.values().append_value(2);
711        value_builder.append(true);
712
713        value_builder.values().append_value(3);
714        value_builder.append(true);
715
716        let values_array = Arc::new(value_builder.finish());
717
718        // Call make_map_batch - should succeed
719        let result = make_map_batch(&[
720            ColumnarValue::Array(keys_array),
721            ColumnarValue::Array(values_array),
722        ]);
723
724        assert!(result.is_ok(), "Should handle NULL maps correctly");
725
726        // Verify the result
727        let map_array = match result.unwrap() {
728            ColumnarValue::Array(arr) => arr,
729            _ => panic!("Expected Array result"),
730        };
731
732        assert_eq!(map_array.len(), 3, "Should have 3 maps");
733        assert!(!map_array.is_null(0), "First map should not be NULL");
734        assert!(map_array.is_null(1), "Second map should be NULL");
735        assert!(!map_array.is_null(2), "Third map should not be NULL");
736    }
737
738    #[test]
739    fn test_make_map_with_null_key_within_map_should_fail() {
740        // Test that null keys WITHIN a map are properly rejected
741        // This ensures the fix doesn't accidentally allow invalid null keys
742
743        // Build keys array: [['a', NULL, 'b']]
744        // The NULL here is a null key within the map, which is invalid
745        let mut key_builder =
746            arrow::array::ListBuilder::new(arrow::array::StringBuilder::new());
747
748        key_builder.values().append_value("a");
749        key_builder.values().append_null(); // Invalid: null key
750        key_builder.values().append_value("b");
751        key_builder.append(true);
752
753        let keys_array = Arc::new(key_builder.finish());
754
755        // Build values array: [[1, 2, 3]]
756        let mut value_builder =
757            arrow::array::ListBuilder::new(arrow::array::Int32Builder::new());
758
759        value_builder.values().append_value(1);
760        value_builder.values().append_value(2);
761        value_builder.values().append_value(3);
762        value_builder.append(true);
763
764        let values_array = Arc::new(value_builder.finish());
765
766        // Call make_map_batch - should fail
767        let result = make_map_batch(&[
768            ColumnarValue::Array(keys_array),
769            ColumnarValue::Array(values_array),
770        ]);
771
772        assert!(result.is_err(), "Should reject null keys within maps");
773
774        let err_msg = result.unwrap_err().to_string();
775        assert!(
776            err_msg.contains("cannot be null"),
777            "Error should mention null keys, got: {err_msg}"
778        );
779    }
780
781    #[test]
782    fn test_make_map_with_large_list() {
783        // Test that LargeList inputs work correctly with i32 offset conversion
784        // This verifies the fix for the offset buffer type mismatch issue
785
786        // Build keys array as LargeList: [['a', 'b'], ['c']]
787        let mut key_builder =
788            arrow::array::LargeListBuilder::new(arrow::array::StringBuilder::new());
789
790        // First map: ['a', 'b']
791        key_builder.values().append_value("a");
792        key_builder.values().append_value("b");
793        key_builder.append(true);
794
795        // Second map: ['c']
796        key_builder.values().append_value("c");
797        key_builder.append(true);
798
799        let keys_array = Arc::new(key_builder.finish());
800
801        // Build values array as LargeList: [[1, 2], [3]]
802        let mut value_builder =
803            arrow::array::LargeListBuilder::new(arrow::array::Int32Builder::new());
804
805        value_builder.values().append_value(1);
806        value_builder.values().append_value(2);
807        value_builder.append(true);
808
809        value_builder.values().append_value(3);
810        value_builder.append(true);
811
812        let values_array = Arc::new(value_builder.finish());
813
814        // Call make_map_batch - should succeed
815        let result = make_map_batch(&[
816            ColumnarValue::Array(keys_array),
817            ColumnarValue::Array(values_array),
818        ]);
819
820        assert!(
821            result.is_ok(),
822            "Should handle LargeList inputs correctly: {:?}",
823            result.err()
824        );
825
826        // Verify the result
827        let map_array = match result.unwrap() {
828            ColumnarValue::Array(arr) => arr,
829            _ => panic!("Expected Array result"),
830        };
831
832        assert_eq!(map_array.len(), 2, "Should have 2 maps");
833        assert!(!map_array.is_null(0), "First map should not be NULL");
834        assert!(!map_array.is_null(1), "Second map should not be NULL");
835    }
836
837    #[test]
838    fn test_make_map_with_fixed_size_list() {
839        // Test that FixedSizeList inputs work correctly
840        // This verifies the fix for FixedSizeList support in the data type check
841
842        use arrow::array::FixedSizeListBuilder;
843
844        // Build keys array as FixedSizeList(2): [['a', 'b'], NULL, ['c', 'd']]
845        let key_values_builder = arrow::array::StringBuilder::new();
846        let mut key_builder = FixedSizeListBuilder::new(key_values_builder, 2);
847
848        // First map: ['a', 'b']
849        key_builder.values().append_value("a");
850        key_builder.values().append_value("b");
851        key_builder.append(true);
852
853        // Second map: NULL (entire map is NULL)
854        key_builder.values().append_null();
855        key_builder.values().append_null();
856        key_builder.append(false);
857
858        // Second map: ['c', 'd']
859        key_builder.values().append_value("c");
860        key_builder.values().append_value("d");
861        key_builder.append(true);
862
863        let keys_array = Arc::new(key_builder.finish());
864
865        // Build values array as FixedSizeList(2): [[1, 2], [99, 100], [3, 4]]
866        // The middle row should be ignored because the corresponding key row is NULL.
867        let value_values_builder = arrow::array::Int32Builder::new();
868        let mut value_builder = FixedSizeListBuilder::new(value_values_builder, 2);
869
870        value_builder.values().append_value(1);
871        value_builder.values().append_value(2);
872        value_builder.append(true);
873
874        value_builder.values().append_value(99);
875        value_builder.values().append_value(100);
876        value_builder.append(true);
877
878        value_builder.values().append_value(3);
879        value_builder.values().append_value(4);
880        value_builder.append(true);
881
882        let values_array = Arc::new(value_builder.finish());
883
884        // Call make_map_batch - should succeed
885        let result = make_map_batch(&[
886            ColumnarValue::Array(keys_array),
887            ColumnarValue::Array(values_array),
888        ]);
889
890        assert!(
891            result.is_ok(),
892            "Should handle FixedSizeList inputs correctly: {:?}",
893            result.err()
894        );
895
896        // Verify the result
897        let map_array = match result.unwrap() {
898            ColumnarValue::Array(arr) => arr,
899            _ => panic!("Expected Array result"),
900        };
901
902        assert_eq!(map_array.len(), 3, "Should have 3 maps");
903        assert!(!map_array.is_null(0), "First map should not be NULL");
904        assert!(map_array.is_null(1), "Second map should be NULL");
905        assert!(!map_array.is_null(2), "Third map should not be NULL");
906    }
907}