Skip to main content

datafusion_common/
metadata.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::BTreeMap, sync::Arc};
19
20use arrow::datatypes::{DataType, Field, FieldRef};
21use hashbrown::HashMap;
22
23use crate::{DataFusionError, ScalarValue, error::_plan_err};
24
25/// A [`ScalarValue`] with optional [`FieldMetadata`]
26#[derive(Debug, Clone)]
27pub struct ScalarAndMetadata {
28    pub value: ScalarValue,
29    pub metadata: Option<FieldMetadata>,
30}
31
32impl ScalarAndMetadata {
33    /// Create a new Literal from a scalar value with optional [`FieldMetadata`]
34    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
35        Self { value, metadata }
36    }
37
38    /// Access the underlying [ScalarValue] storage
39    pub fn value(&self) -> &ScalarValue {
40        &self.value
41    }
42
43    /// Access the [FieldMetadata] attached to this value, if any
44    pub fn metadata(&self) -> Option<&FieldMetadata> {
45        self.metadata.as_ref()
46    }
47
48    /// Consume self and return components
49    pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
50        (self.value, self.metadata)
51    }
52
53    /// Cast this values's storage type
54    ///
55    /// This operation assumes that if the underlying [ScalarValue] can be casted
56    /// to a given type that any extension type represented by the metadata is also
57    /// valid.
58    pub fn cast_storage_to(
59        &self,
60        target_type: &DataType,
61    ) -> Result<Self, DataFusionError> {
62        let new_value = self.value().cast_to(target_type)?;
63        Ok(Self::new(new_value, self.metadata.clone()))
64    }
65}
66
67/// create a new ScalarAndMetadata from a ScalarValue without
68/// any metadata
69impl From<ScalarValue> for ScalarAndMetadata {
70    fn from(value: ScalarValue) -> Self {
71        Self::new(value, None)
72    }
73}
74
75/// Assert equality of data types where one or both sides may have field metadata
76///
77/// This currently compares absent metadata (e.g., one side was a DataType) and
78/// empty metadata (e.g., one side was a field where the field had no metadata)
79/// as equal and uses byte-for-byte comparison for the keys and values of the
80/// fields, even though this is potentially too strict for some cases (e.g.,
81/// extension types where extension metadata is represented by JSON, or cases
82/// where field metadata is orthogonal to the interpretation of the data type).
83///
84/// Returns a planning error with suitably formatted type representations if
85/// actual and expected do not compare to equal.
86pub fn check_metadata_with_storage_equal(
87    actual: (
88        &DataType,
89        Option<&std::collections::HashMap<String, String>>,
90    ),
91    expected: (
92        &DataType,
93        Option<&std::collections::HashMap<String, String>>,
94    ),
95    what: &str,
96    context: &str,
97) -> Result<(), DataFusionError> {
98    if actual.0 != expected.0 {
99        return _plan_err!(
100            "Expected {what} of type {}, got {}{context}",
101            format_type_and_metadata(expected.0, expected.1),
102            format_type_and_metadata(actual.0, actual.1)
103        );
104    }
105
106    let metadata_equal = match (actual.1, expected.1) {
107        (None, None) => true,
108        (None, Some(expected_metadata)) => expected_metadata.is_empty(),
109        (Some(actual_metadata), None) => actual_metadata.is_empty(),
110        (Some(actual_metadata), Some(expected_metadata)) => {
111            actual_metadata == expected_metadata
112        }
113    };
114
115    if !metadata_equal {
116        return _plan_err!(
117            "Expected {what} of type {}, got {}{context}",
118            format_type_and_metadata(expected.0, expected.1),
119            format_type_and_metadata(actual.0, actual.1)
120        );
121    }
122
123    Ok(())
124}
125
126/// Given a data type represented by storage and optional metadata, generate
127/// a user-facing string
128///
129/// This function exists to reduce the number of Field debug strings that are
130/// used to communicate type information in error messages and plan explain
131/// renderings.
132pub fn format_type_and_metadata(
133    data_type: &DataType,
134    metadata: Option<&std::collections::HashMap<String, String>>,
135) -> String {
136    match metadata {
137        Some(metadata) if !metadata.is_empty() => {
138            format!("{data_type}<{metadata:?}>")
139        }
140        _ => data_type.to_string(),
141    }
142}
143
144/// Literal metadata
145///
146/// Stores metadata associated with a literal expressions
147/// and is designed to be fast to `clone`.
148///
149/// This structure is used to store metadata associated with a literal expression, and it
150/// corresponds to the `metadata` field on [`Field`].
151///
152/// # Example: Create [`FieldMetadata`] from a [`Field`]
153/// ```
154/// # use std::collections::HashMap;
155/// # use datafusion_common::metadata::FieldMetadata;
156/// # use arrow::datatypes::{Field, DataType};
157/// # let field = Field::new("c1", DataType::Int32, true)
158/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
159/// // Create a new `FieldMetadata` instance from a `Field`
160/// let metadata = FieldMetadata::new_from_field(&field);
161/// // There is also a `From` impl:
162/// let metadata = FieldMetadata::from(&field);
163/// ```
164///
165/// # Example: Update a [`Field`] with [`FieldMetadata`]
166/// ```
167/// # use datafusion_common::metadata::FieldMetadata;
168/// # use arrow::datatypes::{Field, DataType};
169/// # let field = Field::new("c1", DataType::Int32, true);
170/// # let metadata = FieldMetadata::new_from_field(&field);
171/// // Add any metadata from `FieldMetadata` to `Field`
172/// let updated_field = metadata.add_to_field(field);
173/// ```
174///
175/// For more background, please also see the [Implementing User Defined Types and Custom Metadata in DataFusion blog]
176///
177/// [Implementing User Defined Types and Custom Metadata in DataFusion blog]: https://datafusion.apache.org/blog/2025/09/21/custom-types-using-metadata
178#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
179pub struct FieldMetadata {
180    /// The inner metadata of a literal expression, which is a map of string
181    /// keys to string values.
182    ///
183    /// Note this is not a `HashMap` because `HashMap` does not provide
184    /// implementations for traits like `Debug` and `Hash`.
185    inner: Arc<BTreeMap<String, String>>,
186}
187
188impl Default for FieldMetadata {
189    fn default() -> Self {
190        Self::new_empty()
191    }
192}
193
194impl FieldMetadata {
195    /// Create a new empty metadata instance.
196    pub fn new_empty() -> Self {
197        Self {
198            inner: Arc::new(BTreeMap::new()),
199        }
200    }
201
202    /// Merges two optional `FieldMetadata` instances, overwriting any existing
203    /// keys in `m` with keys from `n` if present.
204    ///
205    /// This function is commonly used in alias operations, particularly for literals
206    /// with metadata. When creating an alias expression, the metadata from the original
207    /// expression (such as a literal) is combined with any metadata specified on the alias.
208    ///
209    /// # Arguments
210    ///
211    /// * `m` - The first metadata (typically from the original expression like a literal)
212    /// * `n` - The second metadata (typically from the alias definition)
213    ///
214    /// # Merge Strategy
215    ///
216    /// - If both metadata instances exist, they are merged with `n` taking precedence
217    /// - Keys from `n` will overwrite keys from `m` if they have the same name
218    /// - If only one metadata instance exists, it is returned unchanged
219    /// - If neither exists, `None` is returned
220    ///
221    /// # Example usage
222    /// ```rust
223    /// use datafusion_common::metadata::FieldMetadata;
224    /// use std::collections::BTreeMap;
225    ///
226    /// // Create metadata for a literal expression
227    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
228    ///     ("source".to_string(), "constant".to_string()),
229    ///     ("type".to_string(), "int".to_string()),
230    /// ])));
231    ///
232    /// // Create metadata for an alias
233    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
234    ///     ("description".to_string(), "answer".to_string()),
235    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
236    /// ])));
237    ///
238    /// // Merge the metadata
239    /// let merged = FieldMetadata::merge_options(
240    ///     literal_metadata.as_ref(),
241    ///     alias_metadata.as_ref(),
242    /// );
243    ///
244    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
245    /// assert!(merged.is_some());
246    /// ```
247    pub fn merge_options(
248        m: Option<&FieldMetadata>,
249        n: Option<&FieldMetadata>,
250    ) -> Option<FieldMetadata> {
251        match (m, n) {
252            (Some(m), Some(n)) => {
253                let mut merged = m.clone();
254                merged.extend(n.clone());
255                Some(merged)
256            }
257            (Some(m), None) => Some(m.clone()),
258            (None, Some(n)) => Some(n.clone()),
259            (None, None) => None,
260        }
261    }
262
263    /// Create a new metadata instance from a `Field`'s metadata.
264    pub fn new_from_field(field: &Field) -> Self {
265        let inner = field
266            .metadata()
267            .iter()
268            .map(|(k, v)| (k.to_string(), v.to_string()))
269            .collect();
270        Self {
271            inner: Arc::new(inner),
272        }
273    }
274
275    /// Create a new metadata instance from a map of string keys to string values.
276    pub fn new(inner: BTreeMap<String, String>) -> Self {
277        Self {
278            inner: Arc::new(inner),
279        }
280    }
281
282    /// Get the inner metadata as a reference to a `BTreeMap`.
283    pub fn inner(&self) -> &BTreeMap<String, String> {
284        &self.inner
285    }
286
287    /// Return the inner metadata
288    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
289        self.inner
290    }
291
292    /// Adds metadata from `other` into `self`, overwriting any existing keys.
293    pub fn extend(&mut self, other: Self) {
294        if other.is_empty() {
295            return;
296        }
297        let other = Arc::unwrap_or_clone(other.into_inner());
298        Arc::make_mut(&mut self.inner).extend(other);
299    }
300
301    /// Returns true if the metadata is empty.
302    pub fn is_empty(&self) -> bool {
303        self.inner.is_empty()
304    }
305
306    /// Returns the number of key-value pairs in the metadata.
307    pub fn len(&self) -> usize {
308        self.inner.len()
309    }
310
311    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
312    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
313        self.inner
314            .iter()
315            .map(|(k, v)| (k.to_string(), v.to_string()))
316            .collect()
317    }
318
319    /// Updates the metadata on the Field with this metadata, if it is not empty.
320    pub fn add_to_field(&self, field: Field) -> Field {
321        if self.inner.is_empty() {
322            return field;
323        }
324
325        field.with_metadata(self.to_hashmap())
326    }
327
328    /// Updates the metadata on the FieldRef with this metadata, if it is not empty.
329    pub fn add_to_field_ref(&self, mut field_ref: FieldRef) -> FieldRef {
330        if self.inner.is_empty() {
331            return field_ref;
332        }
333
334        Arc::make_mut(&mut field_ref).set_metadata(self.to_hashmap());
335        field_ref
336    }
337}
338
339impl From<&Field> for FieldMetadata {
340    fn from(field: &Field) -> Self {
341        Self::new_from_field(field)
342    }
343}
344
345impl From<BTreeMap<String, String>> for FieldMetadata {
346    fn from(inner: BTreeMap<String, String>) -> Self {
347        Self::new(inner)
348    }
349}
350
351impl From<std::collections::HashMap<String, String>> for FieldMetadata {
352    fn from(map: std::collections::HashMap<String, String>) -> Self {
353        Self::new(map.into_iter().collect())
354    }
355}
356
357/// From reference
358impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
359    fn from(map: &std::collections::HashMap<String, String>) -> Self {
360        let inner = map
361            .iter()
362            .map(|(k, v)| (k.to_string(), v.to_string()))
363            .collect();
364        Self::new(inner)
365    }
366}
367
368/// From hashbrown map
369impl From<HashMap<String, String>> for FieldMetadata {
370    fn from(map: HashMap<String, String>) -> Self {
371        let inner = map.into_iter().collect();
372        Self::new(inner)
373    }
374}
375
376impl From<&HashMap<String, String>> for FieldMetadata {
377    fn from(map: &HashMap<String, String>) -> Self {
378        let inner = map
379            .into_iter()
380            .map(|(k, v)| (k.to_string(), v.to_string()))
381            .collect();
382        Self::new(inner)
383    }
384}