vortex_array/
context.rs

1use std::fmt::Display;
2use std::sync::{Arc, RwLock, RwLockReadGuard};
3
4use itertools::Itertools;
5use vortex_error::{VortexExpect, VortexResult, vortex_err};
6
7use crate::aliases::hash_map::HashMap;
8use crate::arrays::{
9    BoolEncoding, ChunkedEncoding, ConstantEncoding, DecimalEncoding, ExtensionEncoding,
10    ListEncoding, NullEncoding, PrimitiveEncoding, StructEncoding, VarBinEncoding,
11    VarBinViewEncoding,
12};
13use crate::encoding::Encoding;
14use crate::vtable::VTableRef;
15
16/// A collection of array encodings.
17// TODO(ngates): it feels weird that this has interior mutability. I think maybe it shouldn't.
18pub type ArrayContext = VTableContext<VTableRef>;
19pub type ArrayRegistry = VTableRegistry<VTableRef>;
20
21impl ArrayRegistry {
22    pub fn canonical_only() -> Self {
23        let mut this = Self::empty();
24
25        // Register the canonical encodings
26        this.register_many([
27            NullEncoding.vtable(),
28            BoolEncoding.vtable(),
29            PrimitiveEncoding.vtable(),
30            DecimalEncoding.vtable(),
31            StructEncoding.vtable(),
32            ListEncoding.vtable(),
33            VarBinEncoding.vtable(),
34            VarBinViewEncoding.vtable(),
35            ExtensionEncoding.vtable(),
36        ]);
37
38        // Register the utility encodings
39        this.register_many([ConstantEncoding.vtable(), ChunkedEncoding.vtable()]);
40
41        this
42    }
43}
44
45/// A collection of encodings that can be addressed by a u16 positional index.
46/// This is used to map array encodings and layout encodings when reading from a file.
47#[derive(Debug, Clone)]
48pub struct VTableContext<T>(Arc<RwLock<Vec<T>>>);
49
50impl<T: Clone + Eq> VTableContext<T> {
51    pub fn empty() -> Self {
52        Self(Arc::new(RwLock::new(Vec::new())))
53    }
54
55    pub fn with(self, encoding: T) -> Self {
56        {
57            let mut write = self.0.write().vortex_expect("poisoned lock");
58            if write.iter().all(|e| e != &encoding) {
59                write.push(encoding);
60            }
61        }
62        self
63    }
64
65    pub fn with_many<E: IntoIterator<Item = T>>(self, items: E) -> Self {
66        items.into_iter().fold(self, |ctx, e| ctx.with(e))
67    }
68
69    pub fn encodings(&self) -> RwLockReadGuard<Vec<T>> {
70        self.0.read().vortex_expect("poisoned lock")
71    }
72
73    /// Returns the index of the encoding in the context, or adds it if it doesn't exist.
74    pub fn encoding_idx(&self, encoding: &T) -> u16 {
75        let mut write = self.0.write().vortex_expect("poisoned lock");
76        if let Some(idx) = write.iter().position(|e| e == encoding) {
77            return u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX encodings");
78        }
79        assert!(
80            write.len() < u16::MAX as usize,
81            "Cannot have more than u16::MAX encodings"
82        );
83        write.push(encoding.clone());
84        u16::try_from(write.len() - 1).vortex_expect("checked already")
85    }
86
87    /// Find an encoding by its position.
88    pub fn lookup_encoding(&self, idx: u16) -> Option<T> {
89        self.0
90            .read()
91            .vortex_expect("poisoned lock")
92            .get(idx as usize)
93            .cloned()
94    }
95}
96
97/// A registry of encodings that can be used to construct a context for serde.
98///
99/// In the future, we will support loading encodings from shared libraries or even from within
100/// the Vortex file itself. This registry will be used to manage the available encodings.
101#[derive(Debug)]
102pub struct VTableRegistry<T>(HashMap<String, T>);
103
104impl<T: Clone + Display + Eq> VTableRegistry<T> {
105    pub fn empty() -> Self {
106        Self(Default::default())
107    }
108
109    /// Create a new [`VTableContext`] with the provided encodings.
110    pub fn new_context<'a>(
111        &self,
112        encoding_ids: impl Iterator<Item = &'a str>,
113    ) -> VortexResult<VTableContext<T>> {
114        let mut ctx = VTableContext::<T>::empty();
115        for id in encoding_ids {
116            let encoding = self.0.get(id).ok_or_else(|| {
117                vortex_err!(
118                    "Array encoding {} not found in registry {}",
119                    id,
120                    self.0.values().join(", ")
121                )
122            })?;
123            ctx = ctx.with(encoding.clone());
124        }
125        Ok(ctx)
126    }
127
128    /// List the vtables in the registry.
129    pub fn vtables(&self) -> impl Iterator<Item = &T> + '_ {
130        self.0.values()
131    }
132
133    /// Register a new encoding, replacing any existing encoding with the same ID.
134    pub fn register(&mut self, encoding: T) {
135        self.0.insert(encoding.to_string(), encoding);
136    }
137
138    /// Register a new encoding, replacing any existing encoding with the same ID.
139    pub fn register_many<I: IntoIterator<Item = T>>(&mut self, encodings: I) {
140        self.0
141            .extend(encodings.into_iter().map(|e| (e.to_string(), e)));
142    }
143}