Skip to main content

datafusion_functions/core/
arrow_field.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::array::{
19    Array, BooleanArray, MapBuilder, StringArray, StringBuilder, StructArray,
20};
21use arrow::datatypes::{DataType, Field, Fields};
22use datafusion_common::{Result, ScalarValue, utils::take_function_args};
23use datafusion_expr::{
24    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
25    Volatility,
26};
27use datafusion_macros::user_doc;
28use std::sync::Arc;
29
30#[user_doc(
31    doc_section(label = "Other Functions"),
32    description = "Returns a struct containing the Arrow field information of the expression, including name, data type, nullability, and metadata.",
33    syntax_example = "arrow_field(expression)",
34    sql_example = r#"```sql
35> select arrow_field(1);
36+-------------------------------------------------------------+
37| arrow_field(Int64(1))                                       |
38+-------------------------------------------------------------+
39| {name: lit, data_type: Int64, nullable: false, metadata: {}} |
40+-------------------------------------------------------------+
41
42> select arrow_field(1)['data_type'];
43+-----------------------------------+
44| arrow_field(Int64(1))[data_type]  |
45+-----------------------------------+
46| Int64                             |
47+-----------------------------------+
48```"#,
49    argument(
50        name = "expression",
51        description = "Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators."
52    )
53)]
54#[derive(Clone, Debug, PartialEq, Eq, Hash)]
55pub struct ArrowFieldFunc {
56    signature: Signature,
57}
58
59impl Default for ArrowFieldFunc {
60    fn default() -> Self {
61        Self::new()
62    }
63}
64
65impl ArrowFieldFunc {
66    pub fn new() -> Self {
67        Self {
68            signature: Signature::any(1, Volatility::Immutable),
69        }
70    }
71
72    fn return_struct_type() -> DataType {
73        DataType::Struct(Fields::from(vec![
74            Field::new("name", DataType::Utf8, false),
75            Field::new("data_type", DataType::Utf8, false),
76            Field::new("nullable", DataType::Boolean, false),
77            Field::new(
78                "metadata",
79                DataType::Map(
80                    Arc::new(Field::new(
81                        "entries",
82                        DataType::Struct(Fields::from(vec![
83                            Field::new("keys", DataType::Utf8, false),
84                            Field::new("values", DataType::Utf8, true),
85                        ])),
86                        false,
87                    )),
88                    false,
89                ),
90                false,
91            ),
92        ]))
93    }
94}
95
96impl ScalarUDFImpl for ArrowFieldFunc {
97    fn name(&self) -> &str {
98        "arrow_field"
99    }
100
101    fn signature(&self) -> &Signature {
102        &self.signature
103    }
104
105    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
106        Ok(Self::return_struct_type())
107    }
108
109    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
110        let return_type = args.return_type().clone();
111        let [field] = take_function_args(self.name(), args.arg_fields)?;
112
113        // Build the name array
114        let name_array =
115            Arc::new(StringArray::from(vec![field.name().as_str()])) as Arc<dyn Array>;
116
117        // Build the data_type array
118        let data_type_str = field.data_type().to_string();
119        let data_type_array =
120            Arc::new(StringArray::from(vec![data_type_str.as_str()])) as Arc<dyn Array>;
121
122        // Build the nullable array
123        let nullable_array =
124            Arc::new(BooleanArray::from(vec![field.is_nullable()])) as Arc<dyn Array>;
125
126        // Build the metadata map array (same pattern as arrow_metadata.rs)
127        let metadata = field.metadata();
128        let mut map_builder =
129            MapBuilder::new(None, StringBuilder::new(), StringBuilder::new());
130
131        let mut entries: Vec<_> = metadata.iter().collect();
132        entries.sort_by_key(|(k, _)| *k);
133
134        for (k, v) in entries {
135            map_builder.keys().append_value(k);
136            map_builder.values().append_value(v);
137        }
138        map_builder.append(true)?;
139
140        let metadata_array = Arc::new(map_builder.finish()) as Arc<dyn Array>;
141
142        // Build the struct
143        let DataType::Struct(fields) = return_type else {
144            unreachable!()
145        };
146
147        let struct_array = StructArray::new(
148            fields,
149            vec![name_array, data_type_array, nullable_array, metadata_array],
150            None,
151        );
152
153        Ok(ColumnarValue::Scalar(ScalarValue::try_from_array(
154            &struct_array,
155            0,
156        )?))
157    }
158
159    fn documentation(&self) -> Option<&Documentation> {
160        self.doc()
161    }
162}