Skip to main content

vortex_session/
registry.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Many session types use a registry of objects that can be looked up by name to construct
5//! contexts. This module provides a generic registry type for that purpose.
6
7use std::fmt::Debug;
8use std::ops::Deref;
9use std::sync::Arc;
10
11use arcref::ArcRef;
12use parking_lot::Mutex;
13use vortex_error::VortexExpect;
14use vortex_utils::aliases::dash_map::DashMap;
15
16/// An identifier for an item in a registry.
17pub type Id = ArcRef<str>;
18
19/// A registry of items that are keyed by a string identifier.
20#[derive(Clone, Debug)]
21pub struct Registry<T>(Arc<DashMap<Id, T>>);
22
23impl<T> Default for Registry<T> {
24    fn default() -> Self {
25        Self(Default::default())
26    }
27}
28
29impl<T: Clone> Registry<T> {
30    pub fn empty() -> Self {
31        Self(Default::default())
32    }
33
34    /// List the IDs in the registry.
35    pub fn ids(&self) -> impl Iterator<Item = Id> + '_ {
36        self.0.iter().map(|i| i.key().clone())
37    }
38
39    /// List the items in the registry.
40    pub fn items(&self) -> impl Iterator<Item = T> + '_ {
41        self.0.iter().map(|i| i.value().clone())
42    }
43
44    /// Return the items with the given IDs.
45    pub fn find_many<'a>(
46        &self,
47        ids: impl IntoIterator<Item = &'a Id>,
48    ) -> impl Iterator<Item = Option<impl Deref<Target = T>>> {
49        ids.into_iter().map(|id| self.0.get(id))
50    }
51
52    /// Find the item with the given ID.
53    pub fn find(&self, id: &Id) -> Option<T> {
54        self.0.get(id).as_deref().cloned()
55    }
56
57    /// Register a new item, replacing any existing item with the same ID.
58    pub fn register(&self, id: impl Into<Id>, item: impl Into<T>) {
59        self.0.insert(id.into(), item.into());
60    }
61
62    /// Register a new item, replacing any existing item with the same ID, and return self for
63    pub fn with(self, id: impl Into<Id>, item: impl Into<T>) -> Self {
64        self.register(id, item.into());
65        self
66    }
67}
68
69/// A [`Context`] holds a set of interned IDs for use during serialization/deserialization, mapping
70/// IDs to u16 indices.
71///
72/// ## Upcoming Changes
73///
74/// 1. This object holds an Arc of Mutex internally because we need concurrent access from the
75///    layout writer code path. We should update SegmentSink to take an Array rather than
76///    ByteBuffer such that serializing arrays is done sequentially.
77/// 2. The name is terrible. `Interner<T>` is better, but I want to minimize breakage for now.
78#[derive(Clone, Debug)]
79pub struct Context<T> {
80    // TODO(ngates): it's a long story, but if we make SegmentSink and SegmentSource take an
81    //  enum of Segment { Array, DType, Buffer } then we don't actually need a mutable context
82    //  in the LayoutWriter, therefore we don't need a Mutex here and everyone is happier.
83    ids: Arc<Mutex<Vec<Id>>>,
84    // Optional registry used to filter the permissible interned items.
85    registry: Option<Registry<T>>,
86}
87
88impl<T> Default for Context<T> {
89    fn default() -> Self {
90        Self {
91            ids: Arc::new(Mutex::new(Vec::new())),
92            registry: None,
93        }
94    }
95}
96
97impl<T: Clone> Context<T> {
98    /// Create a context with the given initial IDs.
99    pub fn new(ids: Vec<Id>) -> Self {
100        Self {
101            ids: Arc::new(Mutex::new(ids)),
102            registry: None,
103        }
104    }
105
106    /// Create an empty context.
107    pub fn empty() -> Self {
108        Self::default()
109    }
110
111    /// Configure a registry to restrict the permissible set of interned items.
112    pub fn with_registry(mut self, registry: Registry<T>) -> Self {
113        self.registry = Some(registry);
114        self
115    }
116
117    /// Intern an ID, returning its index.
118    pub fn intern(&self, id: &Id) -> Option<u16> {
119        if let Some(registry) = &self.registry
120            && registry.find(id).is_none()
121        {
122            // ID not in registry, cannot intern.
123            return None;
124        }
125
126        let mut ids = self.ids.lock();
127        if let Some(idx) = ids.iter().position(|e| e == id) {
128            return Some(u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX items"));
129        }
130
131        let idx = ids.len();
132        assert!(
133            idx < u16::MAX as usize,
134            "Cannot have more than u16::MAX items"
135        );
136        ids.push(id.clone());
137        Some(u16::try_from(idx).vortex_expect("checked already"))
138    }
139
140    /// Resolve an interned ID by its index.
141    pub fn resolve(&self, idx: u16) -> Option<Id> {
142        self.ids.lock().get(idx as usize).cloned()
143    }
144
145    /// Get the list of interned IDs.
146    pub fn to_ids(&self) -> Vec<Id> {
147        self.ids.lock().clone()
148    }
149}