reddb_server/storage/query/executors/
set_ops.rs1use std::collections::hash_map::DefaultHasher;
18use std::collections::HashSet;
19use std::hash::{Hash, Hasher};
20
21use super::super::engine::binding::{Binding, Value};
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum SetOpType {
26 Union,
28 UnionAll,
30 Intersect,
32 Except,
34}
35
36#[derive(Debug, Clone, Default)]
38pub struct SetOpStats {
39 pub left_size: usize,
41 pub right_size: usize,
43 pub result_size: usize,
45 pub duplicates_removed: usize,
47}
48
49pub fn execute_set_op(
51 left: Vec<Binding>,
52 right: Vec<Binding>,
53 op_type: SetOpType,
54) -> (Vec<Binding>, SetOpStats) {
55 let left_size = left.len();
56 let right_size = right.len();
57
58 let (result, duplicates_removed) = match op_type {
59 SetOpType::Union => set_union(left, right, true),
60 SetOpType::UnionAll => set_union(left, right, false),
61 SetOpType::Intersect => (set_intersect(left, right), 0),
62 SetOpType::Except => (set_except(left, right), 0),
63 };
64
65 let stats = SetOpStats {
66 left_size,
67 right_size,
68 result_size: result.len(),
69 duplicates_removed,
70 };
71
72 (result, stats)
73}
74
75pub fn set_union(
80 left: Vec<Binding>,
81 right: Vec<Binding>,
82 deduplicate: bool,
83) -> (Vec<Binding>, usize) {
84 if !deduplicate {
85 let mut result = left;
87 result.extend(right);
88 return (result, 0);
89 }
90
91 let mut seen: HashSet<u64> = HashSet::new();
93 let mut result: Vec<Binding> = Vec::new();
94 let mut duplicates = 0;
95
96 for binding in left.into_iter().chain(right) {
97 let hash = binding_hash(&binding);
98 if seen.insert(hash) {
99 result.push(binding);
100 } else {
101 duplicates += 1;
102 }
103 }
104
105 (result, duplicates)
106}
107
108pub fn set_intersect(left: Vec<Binding>, right: Vec<Binding>) -> Vec<Binding> {
112 let right_hashes: HashSet<u64> = right.iter().map(binding_hash).collect();
114
115 let mut seen: HashSet<u64> = HashSet::new();
117 let mut result: Vec<Binding> = Vec::new();
118
119 for binding in left {
120 let hash = binding_hash(&binding);
121 if right_hashes.contains(&hash) && seen.insert(hash) {
122 result.push(binding);
123 }
124 }
125
126 result
127}
128
129pub fn set_except(left: Vec<Binding>, right: Vec<Binding>) -> Vec<Binding> {
133 let right_hashes: HashSet<u64> = right.iter().map(binding_hash).collect();
135
136 let mut seen: HashSet<u64> = HashSet::new();
138 let mut result: Vec<Binding> = Vec::new();
139
140 for binding in left {
141 let hash = binding_hash(&binding);
142 if !right_hashes.contains(&hash) && seen.insert(hash) {
143 result.push(binding);
144 }
145 }
146
147 result
148}
149
150fn binding_hash(binding: &Binding) -> u64 {
152 let mut hasher = DefaultHasher::new();
153
154 let mut vars: Vec<_> = binding.all_vars();
156 vars.sort_by_key(|v| v.name());
157
158 for var in vars {
159 var.name().hash(&mut hasher);
160 if let Some(value) = binding.get(var) {
161 hash_value(value, &mut hasher);
162 } else {
163 "unbound".hash(&mut hasher);
164 }
165 }
166
167 hasher.finish()
168}
169
170fn hash_value(value: &Value, hasher: &mut DefaultHasher) {
171 match value {
172 Value::Node(id) => {
173 "node".hash(hasher);
174 id.hash(hasher);
175 }
176 Value::Edge(id) => {
177 "edge".hash(hasher);
178 id.hash(hasher);
179 }
180 Value::String(s) => {
181 "string".hash(hasher);
182 s.hash(hasher);
183 }
184 Value::Integer(i) => {
185 "int".hash(hasher);
186 i.hash(hasher);
187 }
188 Value::Float(f) => {
189 "float".hash(hasher);
190 f.to_bits().hash(hasher);
191 }
192 Value::Boolean(b) => {
193 "bool".hash(hasher);
194 b.hash(hasher);
195 }
196 Value::Uri(u) => {
197 "uri".hash(hasher);
198 u.hash(hasher);
199 }
200 Value::Null => {
201 "null".hash(hasher);
202 }
203 }
204}
205
206#[cfg(test)]
211mod tests {
212 use super::super::super::engine::binding::Var;
213 use super::*;
214
215 fn make_binding(pairs: &[(&str, &str)]) -> Binding {
216 if pairs.is_empty() {
217 return Binding::empty();
218 }
219
220 let mut result = Binding::one(Var::new(pairs[0].0), Value::String(pairs[0].1.to_string()));
221
222 for (k, v) in pairs.iter().skip(1) {
223 let next = Binding::one(Var::new(k), Value::String(v.to_string()));
224 result = result.merge(&next).unwrap_or(result);
225 }
226
227 result
228 }
229
230 #[test]
231 fn test_union_all() {
232 let left = vec![make_binding(&[("x", "a")]), make_binding(&[("x", "b")])];
233 let right = vec![make_binding(&[("x", "b")]), make_binding(&[("x", "c")])];
234
235 let (result, stats) = execute_set_op(left, right, SetOpType::UnionAll);
236 assert_eq!(result.len(), 4);
237 assert_eq!(stats.duplicates_removed, 0);
238 }
239
240 #[test]
241 fn test_union_distinct() {
242 let left = vec![make_binding(&[("x", "a")]), make_binding(&[("x", "b")])];
243 let right = vec![make_binding(&[("x", "b")]), make_binding(&[("x", "c")])];
244
245 let (result, stats) = execute_set_op(left, right, SetOpType::Union);
246 assert_eq!(result.len(), 3); assert_eq!(stats.duplicates_removed, 1); }
249
250 #[test]
251 fn test_intersect() {
252 let left = vec![
253 make_binding(&[("x", "a")]),
254 make_binding(&[("x", "b")]),
255 make_binding(&[("x", "c")]),
256 ];
257 let right = vec![
258 make_binding(&[("x", "b")]),
259 make_binding(&[("x", "c")]),
260 make_binding(&[("x", "d")]),
261 ];
262
263 let (result, stats) = execute_set_op(left, right, SetOpType::Intersect);
264 assert_eq!(result.len(), 2); assert_eq!(stats.left_size, 3);
266 assert_eq!(stats.right_size, 3);
267 }
268
269 #[test]
270 fn test_except() {
271 let left = vec![
272 make_binding(&[("x", "a")]),
273 make_binding(&[("x", "b")]),
274 make_binding(&[("x", "c")]),
275 ];
276 let right = vec![make_binding(&[("x", "b")])];
277
278 let (result, stats) = execute_set_op(left, right, SetOpType::Except);
279 assert_eq!(result.len(), 2); assert_eq!(stats.left_size, 3);
281 assert_eq!(stats.right_size, 1);
282 }
283
284 #[test]
285 fn test_intersect_empty() {
286 let left = vec![make_binding(&[("x", "a")])];
287 let right = vec![make_binding(&[("x", "b")])];
288
289 let (result, _) = execute_set_op(left, right, SetOpType::Intersect);
290 assert!(result.is_empty());
291 }
292
293 #[test]
294 fn test_except_complete() {
295 let left = vec![make_binding(&[("x", "a")])];
296 let right = vec![make_binding(&[("x", "a")])];
297
298 let (result, _) = execute_set_op(left, right, SetOpType::Except);
299 assert!(result.is_empty());
300 }
301}