datafusion_common/metadata.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::BTreeMap, sync::Arc};
19
20use arrow::datatypes::{DataType, Field, FieldRef};
21use hashbrown::HashMap;
22
23use crate::{DataFusionError, ScalarValue, error::_plan_err};
24
25/// A [`ScalarValue`] with optional [`FieldMetadata`]
26#[derive(Debug, Clone)]
27pub struct ScalarAndMetadata {
28 pub value: ScalarValue,
29 pub metadata: Option<FieldMetadata>,
30}
31
32impl ScalarAndMetadata {
33 /// Create a new Literal from a scalar value with optional [`FieldMetadata`]
34 pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
35 Self { value, metadata }
36 }
37
38 /// Access the underlying [ScalarValue] storage
39 pub fn value(&self) -> &ScalarValue {
40 &self.value
41 }
42
43 /// Access the [FieldMetadata] attached to this value, if any
44 pub fn metadata(&self) -> Option<&FieldMetadata> {
45 self.metadata.as_ref()
46 }
47
48 /// Consume self and return components
49 pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
50 (self.value, self.metadata)
51 }
52
53 /// Cast this values's storage type
54 ///
55 /// This operation assumes that if the underlying [ScalarValue] can be casted
56 /// to a given type that any extension type represented by the metadata is also
57 /// valid.
58 pub fn cast_storage_to(
59 &self,
60 target_type: &DataType,
61 ) -> Result<Self, DataFusionError> {
62 let new_value = self.value().cast_to(target_type)?;
63 Ok(Self::new(new_value, self.metadata.clone()))
64 }
65}
66
67/// create a new ScalarAndMetadata from a ScalarValue without
68/// any metadata
69impl From<ScalarValue> for ScalarAndMetadata {
70 fn from(value: ScalarValue) -> Self {
71 Self::new(value, None)
72 }
73}
74
75/// Assert equality of data types where one or both sides may have field metadata
76///
77/// This currently compares absent metadata (e.g., one side was a DataType) and
78/// empty metadata (e.g., one side was a field where the field had no metadata)
79/// as equal and uses byte-for-byte comparison for the keys and values of the
80/// fields, even though this is potentially too strict for some cases (e.g.,
81/// extension types where extension metadata is represented by JSON, or cases
82/// where field metadata is orthogonal to the interpretation of the data type).
83///
84/// Returns a planning error with suitably formatted type representations if
85/// actual and expected do not compare to equal.
86pub fn check_metadata_with_storage_equal(
87 actual: (
88 &DataType,
89 Option<&std::collections::HashMap<String, String>>,
90 ),
91 expected: (
92 &DataType,
93 Option<&std::collections::HashMap<String, String>>,
94 ),
95 what: &str,
96 context: &str,
97) -> Result<(), DataFusionError> {
98 if actual.0 != expected.0 {
99 return _plan_err!(
100 "Expected {what} of type {}, got {}{context}",
101 format_type_and_metadata(expected.0, expected.1),
102 format_type_and_metadata(actual.0, actual.1)
103 );
104 }
105
106 let metadata_equal = match (actual.1, expected.1) {
107 (None, None) => true,
108 (None, Some(expected_metadata)) => expected_metadata.is_empty(),
109 (Some(actual_metadata), None) => actual_metadata.is_empty(),
110 (Some(actual_metadata), Some(expected_metadata)) => {
111 actual_metadata == expected_metadata
112 }
113 };
114
115 if !metadata_equal {
116 return _plan_err!(
117 "Expected {what} of type {}, got {}{context}",
118 format_type_and_metadata(expected.0, expected.1),
119 format_type_and_metadata(actual.0, actual.1)
120 );
121 }
122
123 Ok(())
124}
125
126/// Given a data type represented by storage and optional metadata, generate
127/// a user-facing string
128///
129/// This function exists to reduce the number of Field debug strings that are
130/// used to communicate type information in error messages and plan explain
131/// renderings.
132pub fn format_type_and_metadata(
133 data_type: &DataType,
134 metadata: Option<&std::collections::HashMap<String, String>>,
135) -> String {
136 match metadata {
137 Some(metadata) if !metadata.is_empty() => {
138 format!("{data_type}<{metadata:?}>")
139 }
140 _ => data_type.to_string(),
141 }
142}
143
144/// Literal metadata
145///
146/// Stores metadata associated with a literal expressions
147/// and is designed to be fast to `clone`.
148///
149/// This structure is used to store metadata associated with a literal expression, and it
150/// corresponds to the `metadata` field on [`Field`].
151///
152/// # Example: Create [`FieldMetadata`] from a [`Field`]
153/// ```
154/// # use std::collections::HashMap;
155/// # use datafusion_common::metadata::FieldMetadata;
156/// # use arrow::datatypes::{Field, DataType};
157/// # let field = Field::new("c1", DataType::Int32, true)
158/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
159/// // Create a new `FieldMetadata` instance from a `Field`
160/// let metadata = FieldMetadata::new_from_field(&field);
161/// // There is also a `From` impl:
162/// let metadata = FieldMetadata::from(&field);
163/// ```
164///
165/// # Example: Update a [`Field`] with [`FieldMetadata`]
166/// ```
167/// # use datafusion_common::metadata::FieldMetadata;
168/// # use arrow::datatypes::{Field, DataType};
169/// # let field = Field::new("c1", DataType::Int32, true);
170/// # let metadata = FieldMetadata::new_from_field(&field);
171/// // Add any metadata from `FieldMetadata` to `Field`
172/// let updated_field = metadata.add_to_field(field);
173/// ```
174///
175/// For more background, please also see the [Implementing User Defined Types and Custom Metadata in DataFusion blog]
176///
177/// [Implementing User Defined Types and Custom Metadata in DataFusion blog]: https://datafusion.apache.org/blog/2025/09/21/custom-types-using-metadata
178#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
179pub struct FieldMetadata {
180 /// The inner metadata of a literal expression, which is a map of string
181 /// keys to string values.
182 ///
183 /// Note this is not a `HashMap` because `HashMap` does not provide
184 /// implementations for traits like `Debug` and `Hash`.
185 inner: Arc<BTreeMap<String, String>>,
186}
187
188impl Default for FieldMetadata {
189 fn default() -> Self {
190 Self::new_empty()
191 }
192}
193
194impl FieldMetadata {
195 /// Create a new empty metadata instance.
196 pub fn new_empty() -> Self {
197 Self {
198 inner: Arc::new(BTreeMap::new()),
199 }
200 }
201
202 /// Merges two optional `FieldMetadata` instances, overwriting any existing
203 /// keys in `m` with keys from `n` if present.
204 ///
205 /// This function is commonly used in alias operations, particularly for literals
206 /// with metadata. When creating an alias expression, the metadata from the original
207 /// expression (such as a literal) is combined with any metadata specified on the alias.
208 ///
209 /// # Arguments
210 ///
211 /// * `m` - The first metadata (typically from the original expression like a literal)
212 /// * `n` - The second metadata (typically from the alias definition)
213 ///
214 /// # Merge Strategy
215 ///
216 /// - If both metadata instances exist, they are merged with `n` taking precedence
217 /// - Keys from `n` will overwrite keys from `m` if they have the same name
218 /// - If only one metadata instance exists, it is returned unchanged
219 /// - If neither exists, `None` is returned
220 ///
221 /// # Example usage
222 /// ```rust
223 /// use datafusion_common::metadata::FieldMetadata;
224 /// use std::collections::BTreeMap;
225 ///
226 /// // Create metadata for a literal expression
227 /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
228 /// ("source".to_string(), "constant".to_string()),
229 /// ("type".to_string(), "int".to_string()),
230 /// ])));
231 ///
232 /// // Create metadata for an alias
233 /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
234 /// ("description".to_string(), "answer".to_string()),
235 /// ("source".to_string(), "user".to_string()), // This will override literal's "source"
236 /// ])));
237 ///
238 /// // Merge the metadata
239 /// let merged = FieldMetadata::merge_options(
240 /// literal_metadata.as_ref(),
241 /// alias_metadata.as_ref(),
242 /// );
243 ///
244 /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
245 /// assert!(merged.is_some());
246 /// ```
247 pub fn merge_options(
248 m: Option<&FieldMetadata>,
249 n: Option<&FieldMetadata>,
250 ) -> Option<FieldMetadata> {
251 match (m, n) {
252 (Some(m), Some(n)) => {
253 let mut merged = m.clone();
254 merged.extend(n.clone());
255 Some(merged)
256 }
257 (Some(m), None) => Some(m.clone()),
258 (None, Some(n)) => Some(n.clone()),
259 (None, None) => None,
260 }
261 }
262
263 /// Create a new metadata instance from a `Field`'s metadata.
264 pub fn new_from_field(field: &Field) -> Self {
265 let inner = field
266 .metadata()
267 .iter()
268 .map(|(k, v)| (k.to_string(), v.to_string()))
269 .collect();
270 Self {
271 inner: Arc::new(inner),
272 }
273 }
274
275 /// Create a new metadata instance from a map of string keys to string values.
276 pub fn new(inner: BTreeMap<String, String>) -> Self {
277 Self {
278 inner: Arc::new(inner),
279 }
280 }
281
282 /// Get the inner metadata as a reference to a `BTreeMap`.
283 pub fn inner(&self) -> &BTreeMap<String, String> {
284 &self.inner
285 }
286
287 /// Return the inner metadata
288 pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
289 self.inner
290 }
291
292 /// Adds metadata from `other` into `self`, overwriting any existing keys.
293 pub fn extend(&mut self, other: Self) {
294 if other.is_empty() {
295 return;
296 }
297 let other = Arc::unwrap_or_clone(other.into_inner());
298 Arc::make_mut(&mut self.inner).extend(other);
299 }
300
301 /// Returns true if the metadata is empty.
302 pub fn is_empty(&self) -> bool {
303 self.inner.is_empty()
304 }
305
306 /// Returns the number of key-value pairs in the metadata.
307 pub fn len(&self) -> usize {
308 self.inner.len()
309 }
310
311 /// Convert this `FieldMetadata` into a `HashMap<String, String>`
312 pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
313 self.inner
314 .iter()
315 .map(|(k, v)| (k.to_string(), v.to_string()))
316 .collect()
317 }
318
319 /// Updates the metadata on the Field with this metadata, if it is not empty.
320 pub fn add_to_field(&self, field: Field) -> Field {
321 if self.inner.is_empty() {
322 return field;
323 }
324
325 field.with_metadata(self.to_hashmap())
326 }
327
328 /// Updates the metadata on the FieldRef with this metadata, if it is not empty.
329 pub fn add_to_field_ref(&self, mut field_ref: FieldRef) -> FieldRef {
330 if self.inner.is_empty() {
331 return field_ref;
332 }
333
334 Arc::make_mut(&mut field_ref).set_metadata(self.to_hashmap());
335 field_ref
336 }
337}
338
339impl From<&Field> for FieldMetadata {
340 fn from(field: &Field) -> Self {
341 Self::new_from_field(field)
342 }
343}
344
345impl From<BTreeMap<String, String>> for FieldMetadata {
346 fn from(inner: BTreeMap<String, String>) -> Self {
347 Self::new(inner)
348 }
349}
350
351impl From<std::collections::HashMap<String, String>> for FieldMetadata {
352 fn from(map: std::collections::HashMap<String, String>) -> Self {
353 Self::new(map.into_iter().collect())
354 }
355}
356
357/// From reference
358impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
359 fn from(map: &std::collections::HashMap<String, String>) -> Self {
360 let inner = map
361 .iter()
362 .map(|(k, v)| (k.to_string(), v.to_string()))
363 .collect();
364 Self::new(inner)
365 }
366}
367
368/// From hashbrown map
369impl From<HashMap<String, String>> for FieldMetadata {
370 fn from(map: HashMap<String, String>) -> Self {
371 let inner = map.into_iter().collect();
372 Self::new(inner)
373 }
374}
375
376impl From<&HashMap<String, String>> for FieldMetadata {
377 fn from(map: &HashMap<String, String>) -> Self {
378 let inner = map
379 .into_iter()
380 .map(|(k, v)| (k.to_string(), v.to_string()))
381 .collect();
382 Self::new(inner)
383 }
384}