vortex_array/
context.rs

1use std::fmt::Display;
2use std::sync::{Arc, RwLock, RwLockReadGuard};
3
4use itertools::Itertools;
5use vortex_error::{VortexExpect, VortexResult, vortex_err};
6
7use crate::EncodingRef;
8use crate::aliases::hash_map::HashMap;
9use crate::arrays::{
10    BoolEncoding, ChunkedEncoding, ConstantEncoding, DecimalEncoding, ExtensionEncoding,
11    ListEncoding, NullEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
12    VarBinViewEncoding,
13};
14
15/// A collection of array encodings.
16// TODO(ngates): it feels weird that this has interior mutability. I think maybe it shouldn't.
17pub type ArrayContext = VTableContext<EncodingRef>;
18pub type ArrayRegistry = VTableRegistry<EncodingRef>;
19
20impl ArrayRegistry {
21    pub fn canonical_only() -> Self {
22        let mut this = Self::empty();
23
24        // Register the canonical encodings
25        this.register_many([
26            EncodingRef::new_ref(NullEncoding.as_ref()) as EncodingRef,
27            EncodingRef::new_ref(BoolEncoding.as_ref()),
28            EncodingRef::new_ref(PrimitiveEncoding.as_ref()),
29            EncodingRef::new_ref(DecimalEncoding.as_ref()),
30            EncodingRef::new_ref(StructEncoding.as_ref()),
31            EncodingRef::new_ref(ListEncoding.as_ref()),
32            EncodingRef::new_ref(VarBinEncoding.as_ref()),
33            EncodingRef::new_ref(VarBinViewEncoding.as_ref()),
34            EncodingRef::new_ref(ExtensionEncoding.as_ref()),
35        ]);
36
37        // Register the utility encodings
38        this.register_many([
39            EncodingRef::new_ref(ConstantEncoding.as_ref()) as EncodingRef,
40            EncodingRef::new_ref(ChunkedEncoding.as_ref()),
41        ]);
42
43        this
44    }
45}
46
47/// A collection of encodings that can be addressed by a u16 positional index.
48/// This is used to map array encodings and layout encodings when reading from a file.
49#[derive(Debug, Clone)]
50pub struct VTableContext<T>(Arc<RwLock<Vec<T>>>);
51
52impl<T: Clone + Eq> VTableContext<T> {
53    pub fn empty() -> Self {
54        Self(Arc::new(RwLock::new(Vec::new())))
55    }
56
57    pub fn with(self, encoding: T) -> Self {
58        {
59            let mut write = self.0.write().vortex_expect("poisoned lock");
60            if write.iter().all(|e| e != &encoding) {
61                write.push(encoding);
62            }
63        }
64        self
65    }
66
67    pub fn with_many<E: IntoIterator<Item = T>>(self, items: E) -> Self {
68        items.into_iter().fold(self, |ctx, e| ctx.with(e))
69    }
70
71    pub fn encodings(&self) -> RwLockReadGuard<Vec<T>> {
72        self.0.read().vortex_expect("poisoned lock")
73    }
74
75    /// Returns the index of the encoding in the context, or adds it if it doesn't exist.
76    pub fn encoding_idx(&self, encoding: &T) -> u16 {
77        let mut write = self.0.write().vortex_expect("poisoned lock");
78        if let Some(idx) = write.iter().position(|e| e == encoding) {
79            return u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX encodings");
80        }
81        assert!(
82            write.len() < u16::MAX as usize,
83            "Cannot have more than u16::MAX encodings"
84        );
85        write.push(encoding.clone());
86        u16::try_from(write.len() - 1).vortex_expect("checked already")
87    }
88
89    /// Find an encoding by its position.
90    pub fn lookup_encoding(&self, idx: u16) -> Option<T> {
91        self.0
92            .read()
93            .vortex_expect("poisoned lock")
94            .get(idx as usize)
95            .cloned()
96    }
97}
98
99/// A registry of encodings that can be used to construct a context for serde.
100///
101/// In the future, we will support loading encodings from shared libraries or even from within
102/// the Vortex file itself. This registry will be used to manage the available encodings.
103#[derive(Debug)]
104pub struct VTableRegistry<T>(HashMap<String, T>);
105
106impl<T: Clone + Display + Eq> VTableRegistry<T> {
107    pub fn empty() -> Self {
108        Self(Default::default())
109    }
110
111    /// Create a new [`VTableContext`] with the provided encodings.
112    pub fn new_context<'a>(
113        &self,
114        encoding_ids: impl Iterator<Item = &'a str>,
115    ) -> VortexResult<VTableContext<T>> {
116        let mut ctx = VTableContext::<T>::empty();
117        for id in encoding_ids {
118            let encoding = self.0.get(id).ok_or_else(|| {
119                vortex_err!(
120                    "Array encoding {} not found in registry {}",
121                    id,
122                    self.0.values().join(", ")
123                )
124            })?;
125            ctx = ctx.with(encoding.clone());
126        }
127        Ok(ctx)
128    }
129
130    /// List the vtables in the registry.
131    pub fn vtables(&self) -> impl Iterator<Item = &T> + '_ {
132        self.0.values()
133    }
134
135    /// Register a new encoding, replacing any existing encoding with the same ID.
136    pub fn register(&mut self, encoding: T) {
137        self.0.insert(encoding.to_string(), encoding);
138    }
139
140    /// Register a new encoding, replacing any existing encoding with the same ID.
141    pub fn register_many<I: IntoIterator<Item = T>>(&mut self, encodings: I) {
142        self.0
143            .extend(encodings.into_iter().map(|e| (e.to_string(), e)));
144    }
145}