vortex_array/
context.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Display;
5use std::sync::Arc;
6
7use itertools::Itertools;
8use parking_lot::RwLock;
9use vortex_error::VortexExpect;
10use vortex_error::VortexResult;
11use vortex_error::vortex_bail;
12use vortex_error::vortex_err;
13use vortex_session::registry::Registry;
14
15use crate::vtable::ArrayVTable;
16
17pub type ArrayContext = VTableContext<ArrayVTable>;
18
19/// A collection of encodings that can be addressed by a u16 positional index.
20/// This is used to map array encodings and layout encodings when reading from a file.
21#[derive(Debug, Clone)]
22pub struct VTableContext<T>(Arc<RwLock<Vec<T>>>);
23
24impl<T: Clone + Eq> VTableContext<T> {
25    pub fn new(encodings: Vec<T>) -> Self {
26        Self(Arc::new(RwLock::new(encodings)))
27    }
28
29    pub fn from_registry_sorted(registry: &Registry<T>) -> Self
30    where
31        T: Display,
32    {
33        let mut encodings: Vec<T> = registry.items().collect();
34        encodings.sort_by_key(|a| a.to_string());
35        Self::new(encodings)
36    }
37
38    pub fn try_from_registry<'a>(
39        registry: &Registry<T>,
40        ids: impl IntoIterator<Item = &'a str>,
41    ) -> VortexResult<Self>
42    where
43        T: Display,
44    {
45        let items: Vec<T> = ids
46            .into_iter()
47            .map(|id| {
48                registry
49                    .find(id)
50                    .ok_or_else(|| vortex_err!("Registry missing encoding with id {}", id))
51            })
52            .try_collect()?;
53        if items.len() > u16::MAX as usize {
54            vortex_bail!(
55                "Cannot create VTableContext: registry has more than u16::MAX ({}) items",
56                u16::MAX
57            );
58        }
59        Ok(Self::new(items))
60    }
61
62    pub fn empty() -> Self {
63        Self(Arc::new(RwLock::new(Vec::new())))
64    }
65
66    pub fn with(self, encoding: T) -> Self {
67        {
68            let mut write = self.0.write();
69            if write.iter().all(|e| e != &encoding) {
70                write.push(encoding);
71            }
72        }
73        self
74    }
75
76    pub fn with_many<E: IntoIterator<Item = T>>(self, items: E) -> Self {
77        items.into_iter().fold(self, |ctx, e| ctx.with(e))
78    }
79
80    pub fn encodings(&self) -> Vec<T> {
81        self.0.read().clone()
82    }
83
84    /// Returns the index of the encoding in the context, or adds it if it doesn't exist.
85    ///
86    /// At write time the order encodings are registered by this method can change.
87    /// See [File Format specification](https://docs.vortex.rs/specs/file-format#file-determinism-and-reproducibility)
88    /// for more details.
89    pub fn encoding_idx(&self, encoding: &T) -> u16 {
90        let mut write = self.0.write();
91        if let Some(idx) = write.iter().position(|e| e == encoding) {
92            return u16::try_from(idx).vortex_expect("Cannot have more than u16::MAX encodings");
93        }
94        assert!(
95            write.len() < u16::MAX as usize,
96            "Cannot have more than u16::MAX encodings"
97        );
98        write.push(encoding.clone());
99        u16::try_from(write.len() - 1).vortex_expect("checked already")
100    }
101
102    /// Find an encoding by its position.
103    pub fn lookup_encoding(&self, idx: u16) -> Option<T> {
104        self.0.read().get(idx as usize).cloned()
105    }
106}