datafusion_common/
metadata.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::BTreeMap, sync::Arc};
19
20use arrow::datatypes::{DataType, Field};
21use hashbrown::HashMap;
22
23use crate::{error::_plan_err, DataFusionError, ScalarValue};
24
25/// A [`ScalarValue`] with optional [`FieldMetadata`]
26#[derive(Debug, Clone)]
27pub struct ScalarAndMetadata {
28    pub value: ScalarValue,
29    pub metadata: Option<FieldMetadata>,
30}
31
32impl ScalarAndMetadata {
33    /// Create a new Literal from a scalar value with optional [`FieldMetadata`]
34    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
35        Self { value, metadata }
36    }
37
38    /// Access the underlying [ScalarValue] storage
39    pub fn value(&self) -> &ScalarValue {
40        &self.value
41    }
42
43    /// Access the [FieldMetadata] attached to this value, if any
44    pub fn metadata(&self) -> Option<&FieldMetadata> {
45        self.metadata.as_ref()
46    }
47
48    /// Consume self and return components
49    pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
50        (self.value, self.metadata)
51    }
52
53    /// Cast this values's storage type
54    ///
55    /// This operation assumes that if the underlying [ScalarValue] can be casted
56    /// to a given type that any extension type represented by the metadata is also
57    /// valid.
58    pub fn cast_storage_to(
59        &self,
60        target_type: &DataType,
61    ) -> Result<Self, DataFusionError> {
62        let new_value = self.value().cast_to(target_type)?;
63        Ok(Self::new(new_value, self.metadata.clone()))
64    }
65}
66
67/// create a new ScalarAndMetadata from a ScalarValue without
68/// any metadata
69impl From<ScalarValue> for ScalarAndMetadata {
70    fn from(value: ScalarValue) -> Self {
71        Self::new(value, None)
72    }
73}
74
75/// Assert equality of data types where one or both sides may have field metadata
76///
77/// This currently compares absent metadata (e.g., one side was a DataType) and
78/// empty metadata (e.g., one side was a field where the field had no metadata)
79/// as equal and uses byte-for-byte comparison for the keys and values of the
80/// fields, even though this is potentially too strict for some cases (e.g.,
81/// extension types where extension metadata is represented by JSON, or cases
82/// where field metadata is orthogonal to the interpretation of the data type).
83///
84/// Returns a planning error with suitably formatted type representations if
85/// actual and expected do not compare to equal.
86pub fn check_metadata_with_storage_equal(
87    actual: (
88        &DataType,
89        Option<&std::collections::HashMap<String, String>>,
90    ),
91    expected: (
92        &DataType,
93        Option<&std::collections::HashMap<String, String>>,
94    ),
95    what: &str,
96    context: &str,
97) -> Result<(), DataFusionError> {
98    if actual.0 != expected.0 {
99        return _plan_err!(
100            "Expected {what} of type {}, got {}{context}",
101            format_type_and_metadata(expected.0, expected.1),
102            format_type_and_metadata(actual.0, actual.1)
103        );
104    }
105
106    let metadata_equal = match (actual.1, expected.1) {
107        (None, None) => true,
108        (None, Some(expected_metadata)) => expected_metadata.is_empty(),
109        (Some(actual_metadata), None) => actual_metadata.is_empty(),
110        (Some(actual_metadata), Some(expected_metadata)) => {
111            actual_metadata == expected_metadata
112        }
113    };
114
115    if !metadata_equal {
116        return _plan_err!(
117            "Expected {what} of type {}, got {}{context}",
118            format_type_and_metadata(expected.0, expected.1),
119            format_type_and_metadata(actual.0, actual.1)
120        );
121    }
122
123    Ok(())
124}
125
126/// Given a data type represented by storage and optional metadata, generate
127/// a user-facing string
128///
129/// This function exists to reduce the number of Field debug strings that are
130/// used to communicate type information in error messages and plan explain
131/// renderings.
132pub fn format_type_and_metadata(
133    data_type: &DataType,
134    metadata: Option<&std::collections::HashMap<String, String>>,
135) -> String {
136    match metadata {
137        Some(metadata) if !metadata.is_empty() => {
138            format!("{data_type}<{metadata:?}>")
139        }
140        _ => data_type.to_string(),
141    }
142}
143
144/// Literal metadata
145///
146/// Stores metadata associated with a literal expressions
147/// and is designed to be fast to `clone`.
148///
149/// This structure is used to store metadata associated with a literal expression, and it
150/// corresponds to the `metadata` field on [`Field`].
151///
152/// # Example: Create [`FieldMetadata`] from a [`Field`]
153/// ```
154/// # use std::collections::HashMap;
155/// # use datafusion_common::metadata::FieldMetadata;
156/// # use arrow::datatypes::{Field, DataType};
157/// # let field = Field::new("c1", DataType::Int32, true)
158/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
159/// // Create a new `FieldMetadata` instance from a `Field`
160/// let metadata = FieldMetadata::new_from_field(&field);
161/// // There is also a `From` impl:
162/// let metadata = FieldMetadata::from(&field);
163/// ```
164///
165/// # Example: Update a [`Field`] with [`FieldMetadata`]
166/// ```
167/// # use datafusion_common::metadata::FieldMetadata;
168/// # use arrow::datatypes::{Field, DataType};
169/// # let field = Field::new("c1", DataType::Int32, true);
170/// # let metadata = FieldMetadata::new_from_field(&field);
171/// // Add any metadata from `FieldMetadata` to `Field`
172/// let updated_field = metadata.add_to_field(field);
173/// ```
174#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
175pub struct FieldMetadata {
176    /// The inner metadata of a literal expression, which is a map of string
177    /// keys to string values.
178    ///
179    /// Note this is not a `HashMap` because `HashMap` does not provide
180    /// implementations for traits like `Debug` and `Hash`.
181    inner: Arc<BTreeMap<String, String>>,
182}
183
184impl Default for FieldMetadata {
185    fn default() -> Self {
186        Self::new_empty()
187    }
188}
189
190impl FieldMetadata {
191    /// Create a new empty metadata instance.
192    pub fn new_empty() -> Self {
193        Self {
194            inner: Arc::new(BTreeMap::new()),
195        }
196    }
197
198    /// Merges two optional `FieldMetadata` instances, overwriting any existing
199    /// keys in `m` with keys from `n` if present.
200    ///
201    /// This function is commonly used in alias operations, particularly for literals
202    /// with metadata. When creating an alias expression, the metadata from the original
203    /// expression (such as a literal) is combined with any metadata specified on the alias.
204    ///
205    /// # Arguments
206    ///
207    /// * `m` - The first metadata (typically from the original expression like a literal)
208    /// * `n` - The second metadata (typically from the alias definition)
209    ///
210    /// # Merge Strategy
211    ///
212    /// - If both metadata instances exist, they are merged with `n` taking precedence
213    /// - Keys from `n` will overwrite keys from `m` if they have the same name
214    /// - If only one metadata instance exists, it is returned unchanged
215    /// - If neither exists, `None` is returned
216    ///
217    /// # Example usage
218    /// ```rust
219    /// use datafusion_common::metadata::FieldMetadata;
220    /// use std::collections::BTreeMap;
221    ///
222    /// // Create metadata for a literal expression
223    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
224    ///     ("source".to_string(), "constant".to_string()),
225    ///     ("type".to_string(), "int".to_string()),
226    /// ])));
227    ///
228    /// // Create metadata for an alias
229    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
230    ///     ("description".to_string(), "answer".to_string()),
231    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
232    /// ])));
233    ///
234    /// // Merge the metadata
235    /// let merged = FieldMetadata::merge_options(
236    ///     literal_metadata.as_ref(),
237    ///     alias_metadata.as_ref(),
238    /// );
239    ///
240    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
241    /// assert!(merged.is_some());
242    /// ```
243    pub fn merge_options(
244        m: Option<&FieldMetadata>,
245        n: Option<&FieldMetadata>,
246    ) -> Option<FieldMetadata> {
247        match (m, n) {
248            (Some(m), Some(n)) => {
249                let mut merged = m.clone();
250                merged.extend(n.clone());
251                Some(merged)
252            }
253            (Some(m), None) => Some(m.clone()),
254            (None, Some(n)) => Some(n.clone()),
255            (None, None) => None,
256        }
257    }
258
259    /// Create a new metadata instance from a `Field`'s metadata.
260    pub fn new_from_field(field: &Field) -> Self {
261        let inner = field
262            .metadata()
263            .iter()
264            .map(|(k, v)| (k.to_string(), v.to_string()))
265            .collect();
266        Self {
267            inner: Arc::new(inner),
268        }
269    }
270
271    /// Create a new metadata instance from a map of string keys to string values.
272    pub fn new(inner: BTreeMap<String, String>) -> Self {
273        Self {
274            inner: Arc::new(inner),
275        }
276    }
277
278    /// Get the inner metadata as a reference to a `BTreeMap`.
279    pub fn inner(&self) -> &BTreeMap<String, String> {
280        &self.inner
281    }
282
283    /// Return the inner metadata
284    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
285        self.inner
286    }
287
288    /// Adds metadata from `other` into `self`, overwriting any existing keys.
289    pub fn extend(&mut self, other: Self) {
290        if other.is_empty() {
291            return;
292        }
293        let other = Arc::unwrap_or_clone(other.into_inner());
294        Arc::make_mut(&mut self.inner).extend(other);
295    }
296
297    /// Returns true if the metadata is empty.
298    pub fn is_empty(&self) -> bool {
299        self.inner.is_empty()
300    }
301
302    /// Returns the number of key-value pairs in the metadata.
303    pub fn len(&self) -> usize {
304        self.inner.len()
305    }
306
307    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
308    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
309        self.inner
310            .iter()
311            .map(|(k, v)| (k.to_string(), v.to_string()))
312            .collect()
313    }
314
315    /// Updates the metadata on the Field with this metadata, if it is not empty.
316    pub fn add_to_field(&self, field: Field) -> Field {
317        if self.inner.is_empty() {
318            return field;
319        }
320
321        field.with_metadata(self.to_hashmap())
322    }
323}
324
325impl From<&Field> for FieldMetadata {
326    fn from(field: &Field) -> Self {
327        Self::new_from_field(field)
328    }
329}
330
331impl From<BTreeMap<String, String>> for FieldMetadata {
332    fn from(inner: BTreeMap<String, String>) -> Self {
333        Self::new(inner)
334    }
335}
336
337impl From<std::collections::HashMap<String, String>> for FieldMetadata {
338    fn from(map: std::collections::HashMap<String, String>) -> Self {
339        Self::new(map.into_iter().collect())
340    }
341}
342
343/// From reference
344impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
345    fn from(map: &std::collections::HashMap<String, String>) -> Self {
346        let inner = map
347            .iter()
348            .map(|(k, v)| (k.to_string(), v.to_string()))
349            .collect();
350        Self::new(inner)
351    }
352}
353
354/// From hashbrown map
355impl From<HashMap<String, String>> for FieldMetadata {
356    fn from(map: HashMap<String, String>) -> Self {
357        let inner = map.into_iter().collect();
358        Self::new(inner)
359    }
360}
361
362impl From<&HashMap<String, String>> for FieldMetadata {
363    fn from(map: &HashMap<String, String>) -> Self {
364        let inner = map
365            .into_iter()
366            .map(|(k, v)| (k.to_string(), v.to_string()))
367            .collect();
368        Self::new(inner)
369    }
370}