vortex_array/
context.rs

1use std::fmt::Display;
2use std::sync::{Arc, RwLock, RwLockReadGuard};
3
4use itertools::Itertools;
5use vortex_error::{VortexExpect, VortexResult, vortex_err};
6
7use crate::aliases::hash_map::HashMap;
8use crate::arrays::{
9    BoolEncoding, ChunkedEncoding, ConstantEncoding, ExtensionEncoding, ListEncoding, NullEncoding,
10    PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
11};
12use crate::encoding::Encoding;
13use crate::vtable::VTableRef;
14
15/// A collection of array encodings.
16// TODO(ngates): it feels weird that this has interior mutability. I think maybe it shouldn't.
17pub type ArrayContext = VTableContext<VTableRef>;
18pub type ArrayRegistry = VTableRegistry<VTableRef>;
19
20impl ArrayRegistry {
21    pub fn canonical_only() -> Self {
22        let mut this = Self::empty();
23
24        // Register the canonical encodings
25        this.register_many([
26            NullEncoding.vtable(),
27            BoolEncoding.vtable(),
28            PrimitiveEncoding.vtable(),
29            StructEncoding.vtable(),
30            ListEncoding.vtable(),
31            VarBinEncoding.vtable(),
32            VarBinViewEncoding.vtable(),
33            ExtensionEncoding.vtable(),
34        ]);
35
36        // Register the utility encodings
37        this.register_many([ConstantEncoding.vtable(), ChunkedEncoding.vtable()]);
38
39        this
40    }
41}
42
43/// A collection of encodings that can be addressed by a u16 positional index.
44/// This is used to map array encodings and layout encodings when reading from a file.
45#[derive(Debug, Clone)]
46pub struct VTableContext<T>(Arc<RwLock<Vec<T>>>);
47
48impl<T: Clone + Eq> VTableContext<T> {
49    pub fn empty() -> Self {
50        Self(Arc::new(RwLock::new(Vec::new())))
51    }
52
53    pub fn with(self, encoding: T) -> Self {
54        {
55            let mut write = self.0.write().vortex_expect("poisoned lock");
56            if write.iter().all(|e| e != &encoding) {
57                write.push(encoding);
58            }
59        }
60        self
61    }
62
63    pub fn with_many<E: IntoIterator<Item = T>>(self, items: E) -> Self {
64        items.into_iter().fold(self, |ctx, e| ctx.with(e))
65    }
66
67    pub fn encodings(&self) -> RwLockReadGuard<Vec<T>> {
68        self.0.read().vortex_expect("poisoned lock")
69    }
70
71    /// Returns the index of the encoding in the context, or adds it if it doesn't exist.
72    pub fn encoding_idx(&self, encoding: &T) -> u16 {
73        let mut write = self.0.write().vortex_expect("poisoned lock");
74        if let Some(idx) = write.iter().position(|e| e == encoding) {
75            return u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX encodings");
76        }
77        assert!(
78            write.len() < u16::MAX as usize,
79            "Cannot have more than u16::MAX encodings"
80        );
81        write.push(encoding.clone());
82        u16::try_from(write.len() - 1).vortex_expect("checked already")
83    }
84
85    /// Find an encoding by its position.
86    pub fn lookup_encoding(&self, idx: u16) -> Option<T> {
87        self.0
88            .read()
89            .vortex_expect("poisoned lock")
90            .get(idx as usize)
91            .cloned()
92    }
93}
94
95/// A registry of encodings that can be used to construct a context for serde.
96///
97/// In the future, we will support loading encodings from shared libraries or even from within
98/// the Vortex file itself. This registry will be used to manage the available encodings.
99#[derive(Debug)]
100pub struct VTableRegistry<T>(HashMap<String, T>);
101
102impl<T: Clone + Display + Eq> VTableRegistry<T> {
103    pub fn empty() -> Self {
104        Self(Default::default())
105    }
106
107    /// Create a new [`VTableContext`] with the provided encodings.
108    pub fn new_context<'a>(
109        &self,
110        encoding_ids: impl Iterator<Item = &'a str>,
111    ) -> VortexResult<VTableContext<T>> {
112        let mut ctx = VTableContext::<T>::empty();
113        for id in encoding_ids {
114            let encoding = self.0.get(id).ok_or_else(|| {
115                vortex_err!(
116                    "Array encoding {} not found in registry {}",
117                    id,
118                    self.0.values().join(", ")
119                )
120            })?;
121            ctx = ctx.with(encoding.clone());
122        }
123        Ok(ctx)
124    }
125
126    /// List the vtables in the registry.
127    pub fn vtables(&self) -> impl Iterator<Item = &T> + '_ {
128        self.0.values()
129    }
130
131    /// Register a new encoding, replacing any existing encoding with the same ID.
132    pub fn register(&mut self, encoding: T) {
133        self.0.insert(encoding.to_string(), encoding);
134    }
135
136    /// Register a new encoding, replacing any existing encoding with the same ID.
137    pub fn register_many<I: IntoIterator<Item = T>>(&mut self, encodings: I) {
138        encodings.into_iter().for_each(|encoding| {
139            self.0.insert(encoding.to_string(), encoding);
140        });
141    }
142}