Skip to main content

vortex_session/
registry.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! Many session types use a registry of objects that can be looked up by name to construct
5//! contexts. This module provides a generic registry type for that purpose.
6
7use std::fmt::Debug;
8use std::ops::Deref;
9use std::sync::Arc;
10
11use arcref::ArcRef;
12use parking_lot::RwLock;
13use vortex_error::VortexExpect;
14use vortex_utils::aliases::dash_map::DashMap;
15
16/// An identifier for an item in a registry.
17pub type Id = ArcRef<str>;
18
19/// A registry of items that are keyed by a string identifier.
20#[derive(Clone, Debug)]
21pub struct Registry<T>(Arc<DashMap<Id, T>>);
22
23impl<T> Default for Registry<T> {
24    fn default() -> Self {
25        Self(Default::default())
26    }
27}
28
29impl<T: Clone> Registry<T> {
30    pub fn empty() -> Self {
31        Self(Default::default())
32    }
33
34    /// List the IDs in the registry.
35    pub fn ids(&self) -> impl Iterator<Item = Id> + '_ {
36        self.0.iter().map(|i| i.key().clone())
37    }
38
39    /// List the items in the registry.
40    pub fn items(&self) -> impl Iterator<Item = T> + '_ {
41        self.0.iter().map(|i| i.value().clone())
42    }
43
44    /// Return the items with the given IDs.
45    pub fn find_many<'a>(
46        &self,
47        ids: impl IntoIterator<Item = &'a Id>,
48    ) -> impl Iterator<Item = Option<impl Deref<Target = T>>> {
49        ids.into_iter().map(|id| self.0.get(id))
50    }
51
52    /// Find the item with the given ID.
53    pub fn find(&self, id: &Id) -> Option<T> {
54        self.0.get(id).as_deref().cloned()
55    }
56
57    /// Register a new item, replacing any existing item with the same ID.
58    pub fn register(&self, id: impl Into<Id>, item: impl Into<T>) {
59        self.0.insert(id.into(), item.into());
60    }
61
62    /// Register a new item, replacing any existing item with the same ID, and return self for
63    pub fn with(self, id: impl Into<Id>, item: impl Into<T>) -> Self {
64        self.register(id, item.into());
65        self
66    }
67}
68
69/// A [`ReadContext`] holds a set of interned IDs for use during deserialization, mapping
70/// u16 indices to IDs.
71#[derive(Clone, Debug)]
72pub struct ReadContext {
73    ids: Arc<[Id]>,
74}
75
76impl ReadContext {
77    /// Create a context with the given initial IDs.
78    pub fn new(ids: impl Into<Arc<[Id]>>) -> Self {
79        Self { ids: ids.into() }
80    }
81
82    /// Resolve an interned ID by its index.
83    pub fn resolve(&self, idx: u16) -> Option<Id> {
84        self.ids.get(idx as usize).cloned()
85    }
86
87    pub fn ids(&self) -> &[Id] {
88        &self.ids
89    }
90}
91
92/// A [`Context`] holds a set of interned IDs for use during serialization/deserialization, mapping
93/// IDs to u16 indices.
94///
95/// ## Upcoming Changes
96///
97/// 1. This object holds an Arc of RwLock internally because we need concurrent access from the
98///    layout writer code path. We should update SegmentSink to take an Array rather than
99///    ByteBuffer such that serializing arrays is done sequentially.
100/// 2. The name is terrible. `Interner<T>` is better, but I want to minimize breakage for now.
101#[derive(Clone, Debug)]
102pub struct Context<T> {
103    // TODO(ngates): it's a long story, but if we make SegmentSink and SegmentSource take an
104    //  enum of Segment { Array, DType, Buffer } then we don't actually need a mutable context
105    //  in the LayoutWriter, therefore we don't need a RwLock here and everyone is happier.
106    ids: Arc<RwLock<Vec<Id>>>,
107    // Optional registry used to filter the permissible interned items.
108    registry: Option<Registry<T>>,
109}
110
111impl<T> Default for Context<T> {
112    fn default() -> Self {
113        Self {
114            ids: Arc::new(RwLock::new(Vec::new())),
115            registry: None,
116        }
117    }
118}
119
120impl<T: Clone> Context<T> {
121    /// Create a context with the given initial IDs.
122    pub fn new(ids: Vec<Id>) -> Self {
123        Self {
124            ids: Arc::new(RwLock::new(ids)),
125            registry: None,
126        }
127    }
128
129    /// Create an empty context.
130    pub fn empty() -> Self {
131        Self::default()
132    }
133
134    /// Configure a registry to restrict the permissible set of interned items.
135    pub fn with_registry(mut self, registry: Registry<T>) -> Self {
136        self.registry = Some(registry);
137        self
138    }
139
140    /// Intern an ID, returning its index.
141    pub fn intern(&self, id: &Id) -> Option<u16> {
142        if let Some(registry) = &self.registry
143            && registry.find(id).is_none()
144        {
145            // ID not in registry, cannot intern.
146            return None;
147        }
148
149        let mut ids = self.ids.write();
150        if let Some(idx) = ids.iter().position(|e| e == id) {
151            return Some(u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX items"));
152        }
153
154        let idx = ids.len();
155        assert!(
156            idx < u16::MAX as usize,
157            "Cannot have more than u16::MAX items"
158        );
159        ids.push(id.clone());
160        Some(u16::try_from(idx).vortex_expect("checked already"))
161    }
162
163    /// Get the list of interned IDs.
164    pub fn to_ids(&self) -> Vec<Id> {
165        self.ids.read().clone()
166    }
167}