vortex_array/
context.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::sync::Arc;
6
7use itertools::Itertools;
8use parking_lot::RwLock;
9use vortex_error::{VortexExpect, VortexResult, vortex_err};
10use vortex_utils::aliases::hash_map::HashMap;
11
12use crate::EncodingRef;
13use crate::arrays::{
14    BoolEncoding, ChunkedEncoding, ConstantEncoding, DecimalEncoding, ExtensionEncoding,
15    FixedSizeListEncoding, ListEncoding, ListViewEncoding, MaskedEncoding, NullEncoding,
16    PrimitiveEncoding, StructEncoding, VarBinEncoding, VarBinViewEncoding,
17};
18
19/// A collection of array encodings.
20// TODO(ngates): it feels weird that this has interior mutability. I think maybe it shouldn't.
21pub type ArrayContext = VTableContext<EncodingRef>;
22pub type ArrayRegistry = VTableRegistry<EncodingRef>;
23
24impl ArrayRegistry {
25    pub fn canonical_only() -> Self {
26        let mut this = Self::empty();
27
28        // Register the canonical encodings.
29        this.register_many([
30            EncodingRef::new_ref(NullEncoding.as_ref()),
31            EncodingRef::new_ref(BoolEncoding.as_ref()),
32            EncodingRef::new_ref(PrimitiveEncoding.as_ref()),
33            EncodingRef::new_ref(DecimalEncoding.as_ref()),
34            EncodingRef::new_ref(VarBinViewEncoding.as_ref()),
35            EncodingRef::new_ref(ListViewEncoding.as_ref()),
36            EncodingRef::new_ref(FixedSizeListEncoding.as_ref()),
37            EncodingRef::new_ref(StructEncoding.as_ref()),
38            EncodingRef::new_ref(ExtensionEncoding.as_ref()),
39        ]);
40
41        // Register the utility encodings.
42        this.register_many([
43            EncodingRef::new_ref(ChunkedEncoding.as_ref()),
44            EncodingRef::new_ref(ConstantEncoding.as_ref()),
45            EncodingRef::new_ref(MaskedEncoding.as_ref()),
46            EncodingRef::new_ref(ListEncoding.as_ref()),
47            EncodingRef::new_ref(VarBinEncoding.as_ref()),
48        ]);
49
50        this
51    }
52}
53
54/// A collection of encodings that can be addressed by a u16 positional index.
55/// This is used to map array encodings and layout encodings when reading from a file.
56#[derive(Debug, Clone)]
57pub struct VTableContext<T>(Arc<RwLock<Vec<T>>>);
58
59impl<T: Clone + Eq> VTableContext<T> {
60    pub fn empty() -> Self {
61        Self(Arc::new(RwLock::new(Vec::new())))
62    }
63
64    pub fn with(self, encoding: T) -> Self {
65        {
66            let mut write = self.0.write();
67            if write.iter().all(|e| e != &encoding) {
68                write.push(encoding);
69            }
70        }
71        self
72    }
73
74    pub fn with_many<E: IntoIterator<Item = T>>(self, items: E) -> Self {
75        items.into_iter().fold(self, |ctx, e| ctx.with(e))
76    }
77
78    pub fn encodings(&self) -> Vec<T> {
79        self.0.read().clone()
80    }
81
82    /// Returns the index of the encoding in the context, or adds it if it doesn't exist.
83    pub fn encoding_idx(&self, encoding: &T) -> u16 {
84        let mut write = self.0.write();
85        if let Some(idx) = write.iter().position(|e| e == encoding) {
86            return u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX encodings");
87        }
88        assert!(
89            write.len() < u16::MAX as usize,
90            "Cannot have more than u16::MAX encodings"
91        );
92        write.push(encoding.clone());
93        u16::try_from(write.len() - 1).vortex_expect("checked already")
94    }
95
96    /// Find an encoding by its position.
97    pub fn lookup_encoding(&self, idx: u16) -> Option<T> {
98        self.0.read().get(idx as usize).cloned()
99    }
100}
101
102/// A registry of encodings that can be used to construct a context for serde.
103///
104/// In the future, we will support loading encodings from shared libraries or even from within
105/// the Vortex file itself. This registry will be used to manage the available encodings.
106#[derive(Clone, Debug)]
107pub struct VTableRegistry<T>(HashMap<String, T>);
108
109// TODO(ngates): define a trait for `T` that requires an `id` method returning a `Arc<str>` and
110//  auto-implement `Display` and `Eq` for it.
111impl<T: Clone + Display + Eq> VTableRegistry<T> {
112    pub fn empty() -> Self {
113        Self(Default::default())
114    }
115
116    /// Create a new [`VTableContext`] with the provided encodings.
117    pub fn new_context<'a>(
118        &self,
119        encoding_ids: impl Iterator<Item = &'a str>,
120    ) -> VortexResult<VTableContext<T>> {
121        let mut ctx = VTableContext::<T>::empty();
122        for id in encoding_ids {
123            let encoding = self.0.get(id).ok_or_else(|| {
124                vortex_err!(
125                    "Array encoding {} not found in registry {}",
126                    id,
127                    self.0.values().join(", ")
128                )
129            })?;
130            ctx = ctx.with(encoding.clone());
131        }
132        Ok(ctx)
133    }
134
135    /// List the vtables in the registry.
136    pub fn vtables(&self) -> impl Iterator<Item = &T> + '_ {
137        self.0.values()
138    }
139
140    /// Find the encoding with the given ID.
141    pub fn get(&self, id: &str) -> Option<&T> {
142        self.0.get(id)
143    }
144
145    /// Register a new encoding, replacing any existing encoding with the same ID.
146    pub fn register(&mut self, encoding: T) {
147        self.0.insert(encoding.to_string(), encoding);
148    }
149
150    /// Register a new encoding, replacing any existing encoding with the same ID.
151    pub fn register_many<I: IntoIterator<Item = T>>(&mut self, encodings: I) {
152        self.0
153            .extend(encodings.into_iter().map(|e| (e.to_string(), e)));
154    }
155}