Skip to main content

reddb_server/storage/query/executors/
set_ops.rs

1//! Set Operations Executor
2//!
3//! Provides UNION, INTERSECT, and EXCEPT operations for query results.
4//!
5//! # Operations
6//!
7//! - **UNION**: Combines results from two queries (with deduplication)
8//! - **UNION ALL**: Combines results without deduplication
9//! - **INTERSECT**: Returns only rows that appear in both queries
10//! - **EXCEPT**: Returns rows from the first query that don't appear in the second
11//!
12//! # Implementation
13//!
14//! Uses hash-based algorithms for O(n+m) performance where n and m are
15//! the sizes of the input result sets.
16
17use std::collections::hash_map::DefaultHasher;
18use std::collections::HashSet;
19use std::hash::{Hash, Hasher};
20
21use super::super::engine::binding::{Binding, Value};
22
23/// Type of set operation
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum SetOpType {
26    /// Union with deduplication
27    Union,
28    /// Union without deduplication
29    UnionAll,
30    /// Intersection
31    Intersect,
32    /// Set difference (EXCEPT)
33    Except,
34}
35
36/// Statistics for set operations
37#[derive(Debug, Clone, Default)]
38pub struct SetOpStats {
39    /// Size of left input
40    pub left_size: usize,
41    /// Size of right input
42    pub right_size: usize,
43    /// Size of result
44    pub result_size: usize,
45    /// Number of duplicates removed (for UNION)
46    pub duplicates_removed: usize,
47}
48
49/// Execute a set operation on two binding vectors
50pub fn execute_set_op(
51    left: Vec<Binding>,
52    right: Vec<Binding>,
53    op_type: SetOpType,
54) -> (Vec<Binding>, SetOpStats) {
55    let left_size = left.len();
56    let right_size = right.len();
57
58    let (result, duplicates_removed) = match op_type {
59        SetOpType::Union => set_union(left, right, true),
60        SetOpType::UnionAll => set_union(left, right, false),
61        SetOpType::Intersect => (set_intersect(left, right), 0),
62        SetOpType::Except => (set_except(left, right), 0),
63    };
64
65    let stats = SetOpStats {
66        left_size,
67        right_size,
68        result_size: result.len(),
69        duplicates_removed,
70    };
71
72    (result, stats)
73}
74
75/// UNION operation
76///
77/// If deduplicate is true, removes duplicate rows (UNION DISTINCT).
78/// If deduplicate is false, keeps all rows (UNION ALL).
79pub fn set_union(
80    left: Vec<Binding>,
81    right: Vec<Binding>,
82    deduplicate: bool,
83) -> (Vec<Binding>, usize) {
84    if !deduplicate {
85        // UNION ALL - just concatenate
86        let mut result = left;
87        result.extend(right);
88        return (result, 0);
89    }
90
91    // UNION with deduplication
92    let mut seen: HashSet<u64> = HashSet::new();
93    let mut result: Vec<Binding> = Vec::new();
94    let mut duplicates = 0;
95
96    for binding in left.into_iter().chain(right) {
97        let hash = binding_hash(&binding);
98        if seen.insert(hash) {
99            result.push(binding);
100        } else {
101            duplicates += 1;
102        }
103    }
104
105    (result, duplicates)
106}
107
108/// INTERSECT operation
109///
110/// Returns only rows that appear in both left and right.
111pub fn set_intersect(left: Vec<Binding>, right: Vec<Binding>) -> Vec<Binding> {
112    // Build hash set of right side
113    let right_hashes: HashSet<u64> = right.iter().map(binding_hash).collect();
114
115    // Filter left to only include those in right
116    let mut seen: HashSet<u64> = HashSet::new();
117    let mut result: Vec<Binding> = Vec::new();
118
119    for binding in left {
120        let hash = binding_hash(&binding);
121        if right_hashes.contains(&hash) && seen.insert(hash) {
122            result.push(binding);
123        }
124    }
125
126    result
127}
128
129/// EXCEPT operation (set difference)
130///
131/// Returns rows from left that don't appear in right.
132pub fn set_except(left: Vec<Binding>, right: Vec<Binding>) -> Vec<Binding> {
133    // Build hash set of right side
134    let right_hashes: HashSet<u64> = right.iter().map(binding_hash).collect();
135
136    // Filter left to only include those NOT in right
137    let mut seen: HashSet<u64> = HashSet::new();
138    let mut result: Vec<Binding> = Vec::new();
139
140    for binding in left {
141        let hash = binding_hash(&binding);
142        if !right_hashes.contains(&hash) && seen.insert(hash) {
143            result.push(binding);
144        }
145    }
146
147    result
148}
149
150/// Compute a deterministic hash for a binding
151fn binding_hash(binding: &Binding) -> u64 {
152    let mut hasher = DefaultHasher::new();
153
154    // Get sorted keys for deterministic ordering
155    let mut vars: Vec<_> = binding.all_vars();
156    vars.sort_by_key(|v| v.name());
157
158    for var in vars {
159        var.name().hash(&mut hasher);
160        if let Some(value) = binding.get(var) {
161            hash_value(value, &mut hasher);
162        } else {
163            "unbound".hash(&mut hasher);
164        }
165    }
166
167    hasher.finish()
168}
169
170fn hash_value(value: &Value, hasher: &mut DefaultHasher) {
171    match value {
172        Value::Node(id) => {
173            "node".hash(hasher);
174            id.hash(hasher);
175        }
176        Value::Edge(id) => {
177            "edge".hash(hasher);
178            id.hash(hasher);
179        }
180        Value::String(s) => {
181            "string".hash(hasher);
182            s.hash(hasher);
183        }
184        Value::Integer(i) => {
185            "int".hash(hasher);
186            i.hash(hasher);
187        }
188        Value::Float(f) => {
189            "float".hash(hasher);
190            f.to_bits().hash(hasher);
191        }
192        Value::Boolean(b) => {
193            "bool".hash(hasher);
194            b.hash(hasher);
195        }
196        Value::Uri(u) => {
197            "uri".hash(hasher);
198            u.hash(hasher);
199        }
200        Value::Null => {
201            "null".hash(hasher);
202        }
203    }
204}
205
206// ============================================================================
207// Tests
208// ============================================================================
209
210#[cfg(test)]
211mod tests {
212    use super::super::super::engine::binding::Var;
213    use super::*;
214
215    fn make_binding(pairs: &[(&str, &str)]) -> Binding {
216        if pairs.is_empty() {
217            return Binding::empty();
218        }
219
220        let mut result = Binding::one(Var::new(pairs[0].0), Value::String(pairs[0].1.to_string()));
221
222        for (k, v) in pairs.iter().skip(1) {
223            let next = Binding::one(Var::new(k), Value::String(v.to_string()));
224            result = result.merge(&next).unwrap_or(result);
225        }
226
227        result
228    }
229
230    #[test]
231    fn test_union_all() {
232        let left = vec![make_binding(&[("x", "a")]), make_binding(&[("x", "b")])];
233        let right = vec![make_binding(&[("x", "b")]), make_binding(&[("x", "c")])];
234
235        let (result, stats) = execute_set_op(left, right, SetOpType::UnionAll);
236        assert_eq!(result.len(), 4);
237        assert_eq!(stats.duplicates_removed, 0);
238    }
239
240    #[test]
241    fn test_union_distinct() {
242        let left = vec![make_binding(&[("x", "a")]), make_binding(&[("x", "b")])];
243        let right = vec![make_binding(&[("x", "b")]), make_binding(&[("x", "c")])];
244
245        let (result, stats) = execute_set_op(left, right, SetOpType::Union);
246        assert_eq!(result.len(), 3); // a, b, c
247        assert_eq!(stats.duplicates_removed, 1); // one 'b' removed
248    }
249
250    #[test]
251    fn test_intersect() {
252        let left = vec![
253            make_binding(&[("x", "a")]),
254            make_binding(&[("x", "b")]),
255            make_binding(&[("x", "c")]),
256        ];
257        let right = vec![
258            make_binding(&[("x", "b")]),
259            make_binding(&[("x", "c")]),
260            make_binding(&[("x", "d")]),
261        ];
262
263        let (result, stats) = execute_set_op(left, right, SetOpType::Intersect);
264        assert_eq!(result.len(), 2); // b, c
265        assert_eq!(stats.left_size, 3);
266        assert_eq!(stats.right_size, 3);
267    }
268
269    #[test]
270    fn test_except() {
271        let left = vec![
272            make_binding(&[("x", "a")]),
273            make_binding(&[("x", "b")]),
274            make_binding(&[("x", "c")]),
275        ];
276        let right = vec![make_binding(&[("x", "b")])];
277
278        let (result, stats) = execute_set_op(left, right, SetOpType::Except);
279        assert_eq!(result.len(), 2); // a, c
280        assert_eq!(stats.left_size, 3);
281        assert_eq!(stats.right_size, 1);
282    }
283
284    #[test]
285    fn test_intersect_empty() {
286        let left = vec![make_binding(&[("x", "a")])];
287        let right = vec![make_binding(&[("x", "b")])];
288
289        let (result, _) = execute_set_op(left, right, SetOpType::Intersect);
290        assert!(result.is_empty());
291    }
292
293    #[test]
294    fn test_except_complete() {
295        let left = vec![make_binding(&[("x", "a")])];
296        let right = vec![make_binding(&[("x", "a")])];
297
298        let (result, _) = execute_set_op(left, right, SetOpType::Except);
299        assert!(result.is_empty());
300    }
301}