Skip to main content

datafusion_common/
extensions.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! A type-keyed map of opaque, `Arc`'d objects.
19//!
20//! Used as the backing store for the various `extensions` fields throughout
21//! DataFusion (e.g. [`SessionConfig`], [`ExtendedStatistics`],
22//! [`PartitionedFile`]) so that independent components can each attach
23//! their own data without conflict, each keyed by its concrete Rust type.
24//!
25//! [`SessionConfig`]: https://docs.rs/datafusion-execution/latest/datafusion_execution/config/struct.SessionConfig.html
26//! [`ExtendedStatistics`]: https://docs.rs/datafusion-physical-plan/latest/datafusion_physical_plan/operator_statistics/struct.ExtendedStatistics.html
27//! [`PartitionedFile`]: https://docs.rs/datafusion-datasource/latest/datafusion_datasource/struct.PartitionedFile.html
28
29use std::any::{Any, TypeId};
30use std::collections::HashMap;
31use std::hash::{BuildHasherDefault, Hasher};
32use std::sync::Arc;
33
34/// A type-keyed map of opaque `Arc`'d values. Each Rust type `T` occupies
35/// its own slot, so independent components can each attach their own data
36/// without conflict.
37///
38/// Cloning is cheap: the backing values are reference-counted.
39///
40/// # Example
41///
42/// ```
43/// # use std::sync::Arc;
44/// # use datafusion_common::extensions::Extensions;
45/// struct MyData(u32);
46/// struct OtherData(&'static str);
47///
48/// let mut ext = Extensions::new();
49/// ext.insert(MyData(42));
50/// ext.insert_arc(Arc::new(OtherData("hello")));
51///
52/// assert_eq!(ext.get::<MyData>().unwrap().0, 42);
53/// assert_eq!(ext.get::<OtherData>().unwrap().0, "hello");
54/// ```
55#[derive(Debug, Clone, Default)]
56pub struct Extensions {
57    inner: HashMap<TypeId, Arc<dyn Any + Send + Sync>, BuildHasherDefault<IdHasher>>,
58}
59
60impl Extensions {
61    /// Create an empty map.
62    pub fn new() -> Self {
63        Self::default()
64    }
65
66    /// Returns true if no extensions are set.
67    pub fn is_empty(&self) -> bool {
68        self.inner.is_empty()
69    }
70
71    /// Number of extensions set.
72    pub fn len(&self) -> usize {
73        self.inner.len()
74    }
75
76    /// Insert an extension keyed by its concrete type `T`. Returns the
77    /// previous value of that type, if any.
78    ///
79    /// The value is wrapped in an [`Arc`] internally. If the caller already
80    /// has an `Arc<T>` and wants to avoid an extra allocation, use
81    /// [`Self::insert_arc`].
82    pub fn insert<T: Any + Send + Sync>(&mut self, value: T) -> Option<Arc<T>> {
83        self.insert_arc(Arc::new(value))
84    }
85
86    /// Insert an extension keyed by its concrete type `T`, taking an
87    /// already-allocated [`Arc<T>`]. Returns the previous value of that type,
88    /// if any.
89    pub fn insert_arc<T: Any + Send + Sync>(&mut self, value: Arc<T>) -> Option<Arc<T>> {
90        self.inner
91            .insert(TypeId::of::<T>(), value)
92            .map(|p| Arc::downcast::<T>(p).expect("TypeId matches T"))
93    }
94
95    /// Insert an already-type-erased value, keyed by its dynamic
96    /// [`TypeId`]. Used internally to support APIs that accept
97    /// `Arc<dyn Any + Send + Sync>` for backwards compatibility and need
98    /// to recover the concrete type for keying.
99    ///
100    /// New code should use [`Self::insert`] or [`Self::insert_arc`], which
101    /// preserve the concrete type at the call site.
102    #[deprecated(
103        since = "54.0.0",
104        note = "use `insert` or `insert_arc`; only retained to support the deprecated `PartitionedFile::with_extensions` shim"
105    )]
106    pub fn insert_dyn(
107        &mut self,
108        value: Arc<dyn Any + Send + Sync>,
109    ) -> Option<Arc<dyn Any + Send + Sync>> {
110        let id = (*value).type_id();
111        self.inner.insert(id, value)
112    }
113
114    /// Borrow the extension of type `T`, if set.
115    pub fn get<T: Any + Send + Sync>(&self) -> Option<&T> {
116        self.inner
117            .get(&TypeId::of::<T>())
118            .and_then(|a| a.downcast_ref::<T>())
119    }
120
121    /// Get a cloned `Arc<T>` of the extension, if set.
122    pub fn get_arc<T: Any + Send + Sync>(&self) -> Option<Arc<T>> {
123        self.inner
124            .get(&TypeId::of::<T>())
125            .map(|a| Arc::downcast::<T>(Arc::clone(a)).expect("TypeId matches T"))
126    }
127
128    /// Returns true if an extension of type `T` is set.
129    pub fn contains<T: Any + Send + Sync>(&self) -> bool {
130        self.inner.contains_key(&TypeId::of::<T>())
131    }
132
133    /// Merge entries from `other` into `self`. Entries in `other` take
134    /// precedence over existing entries with the same type.
135    pub fn merge(&mut self, other: &Extensions) {
136        for (id, ext) in &other.inner {
137            self.inner.insert(*id, Arc::clone(ext));
138        }
139    }
140}
141
142/// Hasher specialized for [`TypeId`] keys. Since `TypeId` is already a
143/// hash produced by the compiler, we don't need to hash it again — we
144/// just store the `u64` it writes and return it unchanged.
145#[derive(Default)]
146struct IdHasher(u64);
147
148impl Hasher for IdHasher {
149    fn write(&mut self, _: &[u8]) {
150        unreachable!("TypeId calls write_u64");
151    }
152
153    #[inline]
154    fn write_u64(&mut self, id: u64) {
155        self.0 = id;
156    }
157
158    #[inline]
159    fn finish(&self) -> u64 {
160        self.0
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[derive(Debug, PartialEq)]
169    struct A(u32);
170
171    #[derive(Debug, PartialEq)]
172    struct B(&'static str);
173
174    #[test]
175    fn insert_get_replace() {
176        let mut ext = Extensions::new();
177        assert!(ext.is_empty());
178
179        ext.insert(A(1));
180        ext.insert_arc(Arc::new(B("x")));
181        assert_eq!(ext.len(), 2);
182        assert_eq!(ext.get::<A>(), Some(&A(1)));
183        assert_eq!(ext.get::<B>(), Some(&B("x")));
184        assert!(ext.contains::<A>());
185
186        let prev = ext.insert(A(2));
187        assert_eq!(prev.as_deref(), Some(&A(1)));
188        assert_eq!(ext.get::<A>(), Some(&A(2)));
189    }
190
191    #[test]
192    #[expect(deprecated)]
193    fn insert_dyn_keys_by_concrete_type() {
194        let mut ext = Extensions::new();
195        let erased: Arc<dyn Any + Send + Sync> = Arc::new(A(7));
196        ext.insert_dyn(erased);
197        assert_eq!(ext.get::<A>(), Some(&A(7)));
198    }
199
200    #[test]
201    fn merge_other_wins() {
202        let mut a = Extensions::new();
203        a.insert(A(1));
204        let mut b = Extensions::new();
205        b.insert(A(2));
206        b.insert(B("hi"));
207        a.merge(&b);
208        assert_eq!(a.get::<A>(), Some(&A(2)));
209        assert_eq!(a.get::<B>(), Some(&B("hi")));
210    }
211}