trs_dataframe/dataframe/
index.rs

1use std::hash::Hasher;
2
3use data_value::DataValue;
4use halfbrown::HashMap;
5
6use super::{column_store::ColumnFrame, Key};
7
8#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
9struct VecIndex {
10    hash: u64,
11}
12
13impl VecIndex {
14    pub fn new(value: &[DataValue]) -> Self {
15        let mut hasher = std::hash::DefaultHasher::new();
16        for s in value.iter() {
17            hasher.write(format!("{s}").as_bytes());
18        }
19        Self {
20            hash: hasher.finish(),
21        }
22    }
23}
24impl std::hash::Hash for VecIndex {
25    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
26        state.write_u64(self.hash);
27    }
28}
29
30impl From<&[DataValue]> for VecIndex {
31    fn from(value: &[DataValue]) -> Self {
32        Self::new(value)
33    }
34}
35
36#[derive(Debug)]
37pub struct Index {
38    index: HashMap<VecIndex, usize>,
39}
40
41impl Index {
42    /// Create the index for the given keys and the [`ColumnFrame`] for the given keys.
43    /// This will enumerate the values and store them in the index with current values
44    pub fn new(key: Vec<Key>, df: &ColumnFrame) -> Self {
45        let selected = df.select(Some(key.as_slice()));
46        let mut this = Self {
47            index: HashMap::new(),
48        };
49
50        for (index, candidate) in selected.rows().into_iter().enumerate() {
51            this.index.insert(
52                VecIndex::from(candidate.as_slice().expect("BUG: this should get slice")),
53                index,
54            );
55        }
56        this
57    }
58
59    pub fn get(&self, values: &[DataValue]) -> Option<usize> {
60        self.index.get(&VecIndex::from(values)).cloned()
61    }
62
63    pub fn join(self, other: Index) -> Vec<(usize, Option<usize>)> {
64        let mut output = Vec::with_capacity(self.index.len());
65        for (index, left_index) in self.index.into_iter() {
66            let idx = other.index.get(&index);
67            output.push((left_index, idx.cloned()));
68        }
69
70        output
71    }
72}