datafusion_common/metadata.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::BTreeMap, sync::Arc};
19
20use arrow::datatypes::{DataType, Field};
21use hashbrown::HashMap;
22
23use crate::{error::_plan_err, DataFusionError, ScalarValue};
24
25/// A [`ScalarValue`] with optional [`FieldMetadata`]
26#[derive(Debug, Clone)]
27pub struct ScalarAndMetadata {
28 pub value: ScalarValue,
29 pub metadata: Option<FieldMetadata>,
30}
31
32impl ScalarAndMetadata {
33 /// Create a new Literal from a scalar value with optional [`FieldMetadata`]
34 pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
35 Self { value, metadata }
36 }
37
38 /// Access the underlying [ScalarValue] storage
39 pub fn value(&self) -> &ScalarValue {
40 &self.value
41 }
42
43 /// Access the [FieldMetadata] attached to this value, if any
44 pub fn metadata(&self) -> Option<&FieldMetadata> {
45 self.metadata.as_ref()
46 }
47
48 /// Consume self and return components
49 pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
50 (self.value, self.metadata)
51 }
52
53 /// Cast this values's storage type
54 ///
55 /// This operation assumes that if the underlying [ScalarValue] can be casted
56 /// to a given type that any extension type represented by the metadata is also
57 /// valid.
58 pub fn cast_storage_to(
59 &self,
60 target_type: &DataType,
61 ) -> Result<Self, DataFusionError> {
62 let new_value = self.value().cast_to(target_type)?;
63 Ok(Self::new(new_value, self.metadata.clone()))
64 }
65}
66
67/// create a new ScalarAndMetadata from a ScalarValue without
68/// any metadata
69impl From<ScalarValue> for ScalarAndMetadata {
70 fn from(value: ScalarValue) -> Self {
71 Self::new(value, None)
72 }
73}
74
75/// Assert equality of data types where one or both sides may have field metadata
76///
77/// This currently compares absent metadata (e.g., one side was a DataType) and
78/// empty metadata (e.g., one side was a field where the field had no metadata)
79/// as equal and uses byte-for-byte comparison for the keys and values of the
80/// fields, even though this is potentially too strict for some cases (e.g.,
81/// extension types where extension metadata is represented by JSON, or cases
82/// where field metadata is orthogonal to the interpretation of the data type).
83///
84/// Returns a planning error with suitably formatted type representations if
85/// actual and expected do not compare to equal.
86pub fn check_metadata_with_storage_equal(
87 actual: (
88 &DataType,
89 Option<&std::collections::HashMap<String, String>>,
90 ),
91 expected: (
92 &DataType,
93 Option<&std::collections::HashMap<String, String>>,
94 ),
95 what: &str,
96 context: &str,
97) -> Result<(), DataFusionError> {
98 if actual.0 != expected.0 {
99 return _plan_err!(
100 "Expected {what} of type {}, got {}{context}",
101 format_type_and_metadata(expected.0, expected.1),
102 format_type_and_metadata(actual.0, actual.1)
103 );
104 }
105
106 let metadata_equal = match (actual.1, expected.1) {
107 (None, None) => true,
108 (None, Some(expected_metadata)) => expected_metadata.is_empty(),
109 (Some(actual_metadata), None) => actual_metadata.is_empty(),
110 (Some(actual_metadata), Some(expected_metadata)) => {
111 actual_metadata == expected_metadata
112 }
113 };
114
115 if !metadata_equal {
116 return _plan_err!(
117 "Expected {what} of type {}, got {}{context}",
118 format_type_and_metadata(expected.0, expected.1),
119 format_type_and_metadata(actual.0, actual.1)
120 );
121 }
122
123 Ok(())
124}
125
126/// Given a data type represented by storage and optional metadata, generate
127/// a user-facing string
128///
129/// This function exists to reduce the number of Field debug strings that are
130/// used to communicate type information in error messages and plan explain
131/// renderings.
132pub fn format_type_and_metadata(
133 data_type: &DataType,
134 metadata: Option<&std::collections::HashMap<String, String>>,
135) -> String {
136 match metadata {
137 Some(metadata) if !metadata.is_empty() => {
138 format!("{data_type}<{metadata:?}>")
139 }
140 _ => data_type.to_string(),
141 }
142}
143
144/// Literal metadata
145///
146/// Stores metadata associated with a literal expressions
147/// and is designed to be fast to `clone`.
148///
149/// This structure is used to store metadata associated with a literal expression, and it
150/// corresponds to the `metadata` field on [`Field`].
151///
152/// # Example: Create [`FieldMetadata`] from a [`Field`]
153/// ```
154/// # use std::collections::HashMap;
155/// # use datafusion_common::metadata::FieldMetadata;
156/// # use arrow::datatypes::{Field, DataType};
157/// # let field = Field::new("c1", DataType::Int32, true)
158/// # .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
159/// // Create a new `FieldMetadata` instance from a `Field`
160/// let metadata = FieldMetadata::new_from_field(&field);
161/// // There is also a `From` impl:
162/// let metadata = FieldMetadata::from(&field);
163/// ```
164///
165/// # Example: Update a [`Field`] with [`FieldMetadata`]
166/// ```
167/// # use datafusion_common::metadata::FieldMetadata;
168/// # use arrow::datatypes::{Field, DataType};
169/// # let field = Field::new("c1", DataType::Int32, true);
170/// # let metadata = FieldMetadata::new_from_field(&field);
171/// // Add any metadata from `FieldMetadata` to `Field`
172/// let updated_field = metadata.add_to_field(field);
173/// ```
174#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
175pub struct FieldMetadata {
176 /// The inner metadata of a literal expression, which is a map of string
177 /// keys to string values.
178 ///
179 /// Note this is not a `HashMap` because `HashMap` does not provide
180 /// implementations for traits like `Debug` and `Hash`.
181 inner: Arc<BTreeMap<String, String>>,
182}
183
184impl Default for FieldMetadata {
185 fn default() -> Self {
186 Self::new_empty()
187 }
188}
189
190impl FieldMetadata {
191 /// Create a new empty metadata instance.
192 pub fn new_empty() -> Self {
193 Self {
194 inner: Arc::new(BTreeMap::new()),
195 }
196 }
197
198 /// Merges two optional `FieldMetadata` instances, overwriting any existing
199 /// keys in `m` with keys from `n` if present.
200 ///
201 /// This function is commonly used in alias operations, particularly for literals
202 /// with metadata. When creating an alias expression, the metadata from the original
203 /// expression (such as a literal) is combined with any metadata specified on the alias.
204 ///
205 /// # Arguments
206 ///
207 /// * `m` - The first metadata (typically from the original expression like a literal)
208 /// * `n` - The second metadata (typically from the alias definition)
209 ///
210 /// # Merge Strategy
211 ///
212 /// - If both metadata instances exist, they are merged with `n` taking precedence
213 /// - Keys from `n` will overwrite keys from `m` if they have the same name
214 /// - If only one metadata instance exists, it is returned unchanged
215 /// - If neither exists, `None` is returned
216 ///
217 /// # Example usage
218 /// ```rust
219 /// use datafusion_common::metadata::FieldMetadata;
220 /// use std::collections::BTreeMap;
221 ///
222 /// // Create metadata for a literal expression
223 /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
224 /// ("source".to_string(), "constant".to_string()),
225 /// ("type".to_string(), "int".to_string()),
226 /// ])));
227 ///
228 /// // Create metadata for an alias
229 /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
230 /// ("description".to_string(), "answer".to_string()),
231 /// ("source".to_string(), "user".to_string()), // This will override literal's "source"
232 /// ])));
233 ///
234 /// // Merge the metadata
235 /// let merged = FieldMetadata::merge_options(
236 /// literal_metadata.as_ref(),
237 /// alias_metadata.as_ref(),
238 /// );
239 ///
240 /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
241 /// assert!(merged.is_some());
242 /// ```
243 pub fn merge_options(
244 m: Option<&FieldMetadata>,
245 n: Option<&FieldMetadata>,
246 ) -> Option<FieldMetadata> {
247 match (m, n) {
248 (Some(m), Some(n)) => {
249 let mut merged = m.clone();
250 merged.extend(n.clone());
251 Some(merged)
252 }
253 (Some(m), None) => Some(m.clone()),
254 (None, Some(n)) => Some(n.clone()),
255 (None, None) => None,
256 }
257 }
258
259 /// Create a new metadata instance from a `Field`'s metadata.
260 pub fn new_from_field(field: &Field) -> Self {
261 let inner = field
262 .metadata()
263 .iter()
264 .map(|(k, v)| (k.to_string(), v.to_string()))
265 .collect();
266 Self {
267 inner: Arc::new(inner),
268 }
269 }
270
271 /// Create a new metadata instance from a map of string keys to string values.
272 pub fn new(inner: BTreeMap<String, String>) -> Self {
273 Self {
274 inner: Arc::new(inner),
275 }
276 }
277
278 /// Get the inner metadata as a reference to a `BTreeMap`.
279 pub fn inner(&self) -> &BTreeMap<String, String> {
280 &self.inner
281 }
282
283 /// Return the inner metadata
284 pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
285 self.inner
286 }
287
288 /// Adds metadata from `other` into `self`, overwriting any existing keys.
289 pub fn extend(&mut self, other: Self) {
290 if other.is_empty() {
291 return;
292 }
293 let other = Arc::unwrap_or_clone(other.into_inner());
294 Arc::make_mut(&mut self.inner).extend(other);
295 }
296
297 /// Returns true if the metadata is empty.
298 pub fn is_empty(&self) -> bool {
299 self.inner.is_empty()
300 }
301
302 /// Returns the number of key-value pairs in the metadata.
303 pub fn len(&self) -> usize {
304 self.inner.len()
305 }
306
307 /// Convert this `FieldMetadata` into a `HashMap<String, String>`
308 pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
309 self.inner
310 .iter()
311 .map(|(k, v)| (k.to_string(), v.to_string()))
312 .collect()
313 }
314
315 /// Updates the metadata on the Field with this metadata, if it is not empty.
316 pub fn add_to_field(&self, field: Field) -> Field {
317 if self.inner.is_empty() {
318 return field;
319 }
320
321 field.with_metadata(self.to_hashmap())
322 }
323}
324
325impl From<&Field> for FieldMetadata {
326 fn from(field: &Field) -> Self {
327 Self::new_from_field(field)
328 }
329}
330
331impl From<BTreeMap<String, String>> for FieldMetadata {
332 fn from(inner: BTreeMap<String, String>) -> Self {
333 Self::new(inner)
334 }
335}
336
337impl From<std::collections::HashMap<String, String>> for FieldMetadata {
338 fn from(map: std::collections::HashMap<String, String>) -> Self {
339 Self::new(map.into_iter().collect())
340 }
341}
342
343/// From reference
344impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
345 fn from(map: &std::collections::HashMap<String, String>) -> Self {
346 let inner = map
347 .iter()
348 .map(|(k, v)| (k.to_string(), v.to_string()))
349 .collect();
350 Self::new(inner)
351 }
352}
353
354/// From hashbrown map
355impl From<HashMap<String, String>> for FieldMetadata {
356 fn from(map: HashMap<String, String>) -> Self {
357 let inner = map.into_iter().collect();
358 Self::new(inner)
359 }
360}
361
362impl From<&HashMap<String, String>> for FieldMetadata {
363 fn from(map: &HashMap<String, String>) -> Self {
364 let inner = map
365 .into_iter()
366 .map(|(k, v)| (k.to_string(), v.to_string()))
367 .collect();
368 Self::new(inner)
369 }
370}