kermit_ds/ds/tree_trie/
implementation.rs

1use {
2    crate::relation::{Relation, RelationHeader},
3    kermit_iters::{JoinIterable, TrieIterable},
4    std::ops::{Index, IndexMut},
5};
6
7/// A node in the trie data structure.
8#[derive(Clone, Debug)]
9pub struct TrieNode {
10    key: usize,
11    children: Vec<TrieNode>,
12}
13
14impl TrieNode {
15    pub(crate) fn new(key: usize) -> Self {
16        Self {
17            key,
18            children: vec![],
19        }
20    }
21
22    pub(crate) fn key(&self) -> usize { self.key }
23
24    pub(crate) fn children(&self) -> &Vec<TrieNode> { &self.children }
25
26    pub(crate) fn children_mut(&mut self) -> &mut Vec<TrieNode> { &mut self.children }
27
28    pub(crate) fn insert_internal(&mut self, tuple: Vec<usize>) -> bool {
29        if tuple.is_empty() {
30            return true;
31        }
32
33        let current_children = self.children_mut();
34        let mut key_iter = tuple.into_iter();
35
36        if let Some(key) = key_iter.next() {
37            // Find insertion point or existing node
38            let insert_pos = current_children.binary_search_by(|node| node.key().cmp(&key));
39
40            match insert_pos {
41                | Ok(pos) => {
42                    // Key exists, continue with its children
43                    current_children[pos].insert_internal(key_iter.collect())
44                },
45                | Err(pos) => {
46                    // Key doesn't exist, insert new node
47                    let mut new_node = TrieNode::new(key);
48                    new_node.insert_internal(key_iter.collect());
49                    current_children.insert(pos, new_node);
50                    true
51                },
52            }
53        } else {
54            true
55        }
56    }
57}
58
59impl Index<usize> for TrieNode {
60    type Output = TrieNode;
61
62    fn index(&self, index: usize) -> &Self::Output { &self.children[index] }
63}
64
65impl IndexMut<usize> for TrieNode {
66    fn index_mut(&mut self, index: usize) -> &mut Self::Output { &mut self.children[index] }
67}
68
69/// Trie data structure for relations.
70#[derive(Clone, Debug)]
71pub struct TreeTrie {
72    header: RelationHeader,
73    children: Vec<TrieNode>,
74}
75
76impl TreeTrie {
77    pub(crate) fn children(&self) -> &Vec<TrieNode> { &self.children }
78
79    pub(crate) fn children_mut(&mut self) -> &mut Vec<TrieNode> { &mut self.children }
80
81    pub(crate) fn insert_internal(&mut self, tuple: Vec<usize>) -> bool {
82        if tuple.is_empty() {
83            return true;
84        }
85
86        let current_children = self.children_mut();
87        let mut key_iter = tuple.into_iter();
88
89        if let Some(key) = key_iter.next() {
90            // Find insertion point or existing node
91            let insert_pos = current_children.binary_search_by(|node| node.key().cmp(&key));
92
93            match insert_pos {
94                | Ok(pos) => {
95                    // Key exists, continue with its children
96                    current_children[pos].insert_internal(key_iter.collect())
97                },
98                | Err(pos) => {
99                    // Key doesn't exist, insert new node
100                    let mut new_node = TrieNode::new(key);
101                    new_node.insert_internal(key_iter.collect());
102                    current_children.insert(pos, new_node);
103                    true
104                },
105            }
106        } else {
107            true
108        }
109    }
110}
111
112impl Relation for TreeTrie {
113    fn header(&self) -> &RelationHeader { &self.header }
114
115    fn new(header: RelationHeader) -> Self {
116        Self {
117            header,
118            children: vec![],
119        }
120    }
121
122    fn from_tuples(header: RelationHeader, mut tuples: Vec<Vec<usize>>) -> Self {
123        if tuples.is_empty() {
124            return Self::new(header);
125        }
126
127        let arity = tuples[0].len();
128        assert!(tuples.iter().all(|tuple| tuple.len() == arity));
129
130        // Sort tuples for efficient insertion
131        tuples.sort_unstable_by(|a, b| {
132            for i in 0..a.len() {
133                match a[i].cmp(&b[i]) {
134                    | std::cmp::Ordering::Less => return std::cmp::Ordering::Less,
135                    | std::cmp::Ordering::Greater => return std::cmp::Ordering::Greater,
136                    | std::cmp::Ordering::Equal => continue,
137                }
138            }
139            std::cmp::Ordering::Equal
140        });
141
142        let mut trie = Self::new(header);
143        for tuple in tuples {
144            if !trie.insert(tuple) {
145                panic!("Failed to build from tuples.");
146            }
147        }
148        trie
149    }
150
151    fn insert(&mut self, tuple: Vec<usize>) -> bool {
152        if tuple.len() != self.header().arity() {
153            panic!("Arity doesn't match.");
154        }
155        self.insert_internal(tuple)
156    }
157
158    fn insert_all(&mut self, tuples: Vec<Vec<usize>>) -> bool {
159        for tuple in tuples {
160            if !self.insert(tuple) {
161                panic!("Failed to insert tuple.");
162            }
163        }
164        true
165    }
166}
167
168impl JoinIterable for TreeTrie {}
169
170impl crate::relation::Projectable for TreeTrie {
171    fn project(&self, columns: Vec<usize>) -> Self {
172        // Create a new header based on the current header but with projected attributes
173        let current_header = self.header();
174        let projected_attrs: Vec<String> = columns
175            .iter()
176            .filter_map(|&col_idx| current_header.attrs().get(col_idx).cloned())
177            .collect();
178
179        let new_header = if projected_attrs.is_empty() {
180            // If no named attributes, create a positional header
181            crate::relation::RelationHeader::new_nameless_positional(columns.len())
182        } else {
183            // Create a header with the projected attributes
184            crate::relation::RelationHeader::new_nameless(projected_attrs)
185        };
186
187        // Collect all tuples from the current relation using the iterator
188        let all_tuples: Vec<Vec<usize>> = self.trie_iter().into_iter().collect();
189
190        // Project each tuple to the specified columns
191        let projected_tuples: Vec<Vec<usize>> = all_tuples
192            .into_iter()
193            .map(|tuple| columns.iter().map(|&col_idx| tuple[col_idx]).collect())
194            .collect();
195
196        // Create new relation from projected tuples
197        Self::from_tuples(new_header, projected_tuples)
198    }
199}