vortex_array/
context.rs

1use std::fmt::Display;
2use std::sync::Arc;
3
4use itertools::Itertools;
5use parking_lot::RwLock;
6use vortex_error::{VortexExpect, VortexResult, vortex_err};
7use vortex_utils::aliases::hash_map::HashMap;
8
9use crate::EncodingRef;
10use crate::arrays::{
11    BoolEncoding, ChunkedEncoding, ConstantEncoding, DecimalEncoding, ExtensionEncoding,
12    ListEncoding, NullEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
13    VarBinViewEncoding,
14};
15
16/// A collection of array encodings.
17// TODO(ngates): it feels weird that this has interior mutability. I think maybe it shouldn't.
18pub type ArrayContext = VTableContext<EncodingRef>;
19pub type ArrayRegistry = VTableRegistry<EncodingRef>;
20
21impl ArrayRegistry {
22    pub fn canonical_only() -> Self {
23        let mut this = Self::empty();
24
25        // Register the canonical encodings
26        this.register_many([
27            EncodingRef::new_ref(NullEncoding.as_ref()) as EncodingRef,
28            EncodingRef::new_ref(BoolEncoding.as_ref()),
29            EncodingRef::new_ref(PrimitiveEncoding.as_ref()),
30            EncodingRef::new_ref(DecimalEncoding.as_ref()),
31            EncodingRef::new_ref(StructEncoding.as_ref()),
32            EncodingRef::new_ref(ListEncoding.as_ref()),
33            EncodingRef::new_ref(VarBinEncoding.as_ref()),
34            EncodingRef::new_ref(VarBinViewEncoding.as_ref()),
35            EncodingRef::new_ref(ExtensionEncoding.as_ref()),
36        ]);
37
38        // Register the utility encodings
39        this.register_many([
40            EncodingRef::new_ref(ConstantEncoding.as_ref()) as EncodingRef,
41            EncodingRef::new_ref(ChunkedEncoding.as_ref()),
42        ]);
43
44        this
45    }
46}
47
48/// A collection of encodings that can be addressed by a u16 positional index.
49/// This is used to map array encodings and layout encodings when reading from a file.
50#[derive(Debug, Clone)]
51pub struct VTableContext<T>(Arc<RwLock<Vec<T>>>);
52
53impl<T: Clone + Eq> VTableContext<T> {
54    pub fn empty() -> Self {
55        Self(Arc::new(RwLock::new(Vec::new())))
56    }
57
58    pub fn with(self, encoding: T) -> Self {
59        {
60            let mut write = self.0.write();
61            if write.iter().all(|e| e != &encoding) {
62                write.push(encoding);
63            }
64        }
65        self
66    }
67
68    pub fn with_many<E: IntoIterator<Item = T>>(self, items: E) -> Self {
69        items.into_iter().fold(self, |ctx, e| ctx.with(e))
70    }
71
72    pub fn encodings(&self) -> Vec<T> {
73        self.0.read().clone()
74    }
75
76    /// Returns the index of the encoding in the context, or adds it if it doesn't exist.
77    pub fn encoding_idx(&self, encoding: &T) -> u16 {
78        let mut write = self.0.write();
79        if let Some(idx) = write.iter().position(|e| e == encoding) {
80            return u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX encodings");
81        }
82        assert!(
83            write.len() < u16::MAX as usize,
84            "Cannot have more than u16::MAX encodings"
85        );
86        write.push(encoding.clone());
87        u16::try_from(write.len() - 1).vortex_expect("checked already")
88    }
89
90    /// Find an encoding by its position.
91    pub fn lookup_encoding(&self, idx: u16) -> Option<T> {
92        self.0.read().get(idx as usize).cloned()
93    }
94}
95
96/// A registry of encodings that can be used to construct a context for serde.
97///
98/// In the future, we will support loading encodings from shared libraries or even from within
99/// the Vortex file itself. This registry will be used to manage the available encodings.
100#[derive(Clone, Debug)]
101pub struct VTableRegistry<T>(HashMap<String, T>);
102
103impl<T: Clone + Display + Eq> VTableRegistry<T> {
104    pub fn empty() -> Self {
105        Self(Default::default())
106    }
107
108    /// Create a new [`VTableContext`] with the provided encodings.
109    pub fn new_context<'a>(
110        &self,
111        encoding_ids: impl Iterator<Item = &'a str>,
112    ) -> VortexResult<VTableContext<T>> {
113        let mut ctx = VTableContext::<T>::empty();
114        for id in encoding_ids {
115            let encoding = self.0.get(id).ok_or_else(|| {
116                vortex_err!(
117                    "Array encoding {} not found in registry {}",
118                    id,
119                    self.0.values().join(", ")
120                )
121            })?;
122            ctx = ctx.with(encoding.clone());
123        }
124        Ok(ctx)
125    }
126
127    /// List the vtables in the registry.
128    pub fn vtables(&self) -> impl Iterator<Item = &T> + '_ {
129        self.0.values()
130    }
131
132    /// Register a new encoding, replacing any existing encoding with the same ID.
133    pub fn register(&mut self, encoding: T) {
134        self.0.insert(encoding.to_string(), encoding);
135    }
136
137    /// Register a new encoding, replacing any existing encoding with the same ID.
138    pub fn register_many<I: IntoIterator<Item = T>>(&mut self, encodings: I) {
139        self.0
140            .extend(encodings.into_iter().map(|e| (e.to_string(), e)));
141    }
142}