Skip to main content

cynos_query/executor/join/
nested.rs

1//! Nested Loop Join implementation.
2
3use crate::executor::{Relation, RelationEntry};
4use alloc::vec::Vec;
5use cynos_core::Value;
6
7/// Nested Loop Join executor.
8///
9/// The simplest join algorithm that compares every pair of rows.
10/// Best for small relations or non-equi joins.
11pub struct NestedLoopJoin {
12    /// Column index for the left relation.
13    left_key_index: usize,
14    /// Column index for the right relation.
15    right_key_index: usize,
16    /// Whether this is an outer join.
17    is_outer_join: bool,
18}
19
20impl NestedLoopJoin {
21    /// Creates a new nested loop join executor.
22    pub fn new(left_key_index: usize, right_key_index: usize, is_outer_join: bool) -> Self {
23        Self {
24            left_key_index,
25            right_key_index,
26            is_outer_join,
27        }
28    }
29
30    /// Creates an inner nested loop join.
31    pub fn inner(left_key_index: usize, right_key_index: usize) -> Self {
32        Self::new(left_key_index, right_key_index, false)
33    }
34
35    /// Creates a left outer nested loop join.
36    pub fn left_outer(left_key_index: usize, right_key_index: usize) -> Self {
37        Self::new(left_key_index, right_key_index, true)
38    }
39
40    /// Executes the nested loop join with equality comparison.
41    pub fn execute(&self, left: Relation, right: Relation) -> Relation {
42        self.execute_with_predicate(left, right, |l, r| l == r)
43    }
44
45    /// Executes the nested loop join with a custom predicate.
46    pub fn execute_with_predicate<F>(&self, left: Relation, right: Relation, predicate: F) -> Relation
47    where
48        F: Fn(&Value, &Value) -> bool,
49    {
50        let mut result_entries = Vec::new();
51        let left_tables = left.tables().to_vec();
52        let right_tables = right.tables().to_vec();
53        let right_col_count = right
54            .entries
55            .first()
56            .map(|e| e.row.len())
57            .unwrap_or(0);
58
59        // Block-based nested loop for better cache performance
60        const BLOCK_SIZE: usize = 256;
61        let right_entries: Vec<_> = right.entries.iter().collect();
62        let block_count = (right_entries.len() + BLOCK_SIZE - 1) / BLOCK_SIZE;
63
64        for left_entry in left.iter() {
65            let mut match_found = false;
66            let left_value = left_entry.get_field(self.left_key_index);
67
68            // Skip if left value is null (nulls don't match)
69            if left_value.map(|v| v.is_null()).unwrap_or(true) {
70                if self.is_outer_join {
71                    let combined = RelationEntry::combine_with_null(
72                        left_entry,
73                        &left_tables,
74                        right_col_count,
75                        &right_tables,
76                    );
77                    result_entries.push(combined);
78                }
79                continue;
80            }
81
82            let left_val = left_value.unwrap();
83
84            // Process in blocks for better cache locality
85            for block in 0..block_count {
86                let start = block * BLOCK_SIZE;
87                let end = core::cmp::min(start + BLOCK_SIZE, right_entries.len());
88
89                for right_entry in &right_entries[start..end] {
90                    if let Some(right_val) = right_entry.get_field(self.right_key_index) {
91                        if !right_val.is_null() && predicate(left_val, right_val) {
92                            match_found = true;
93                            let combined = RelationEntry::combine(
94                                left_entry,
95                                &left_tables,
96                                right_entry,
97                                &right_tables,
98                            );
99                            result_entries.push(combined);
100                        }
101                    }
102                }
103            }
104
105            // For outer join, add unmatched left entries with nulls
106            if self.is_outer_join && !match_found {
107                let combined = RelationEntry::combine_with_null(
108                    left_entry,
109                    &left_tables,
110                    right_col_count,
111                    &right_tables,
112                );
113                result_entries.push(combined);
114            }
115        }
116
117        let mut tables = left_tables;
118        tables.extend(right_tables);
119
120        // Compute combined table column counts
121        let mut table_column_counts = left.table_column_counts().to_vec();
122        table_column_counts.extend(right.table_column_counts().iter().cloned());
123
124        Relation {
125            entries: result_entries,
126            tables,
127            table_column_counts,
128        }
129    }
130}
131
132/// Performs a nested loop join using a predicate function.
133pub fn nested_loop_join<L, R, O, P, OF>(
134    left: &[L],
135    right: &[R],
136    predicate: P,
137    output_fn: OF,
138) -> Vec<O>
139where
140    P: Fn(&L, &R) -> bool,
141    OF: Fn(&L, &R) -> O,
142{
143    let mut results = Vec::new();
144
145    for l in left {
146        for r in right {
147            if predicate(l, r) {
148                results.push(output_fn(l, r));
149            }
150        }
151    }
152
153    results
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use cynos_core::Row;
160    use alloc::vec;
161
162    #[test]
163    fn test_nested_loop_join_inner() {
164        let left_rows = vec![
165            Row::new(0, vec![Value::Int64(1), Value::String("A".into())]),
166            Row::new(1, vec![Value::Int64(2), Value::String("B".into())]),
167            Row::new(2, vec![Value::Int64(3), Value::String("C".into())]),
168        ];
169        let right_rows = vec![
170            Row::new(10, vec![Value::Int64(1), Value::String("X".into())]),
171            Row::new(11, vec![Value::Int64(2), Value::String("Y".into())]),
172            Row::new(12, vec![Value::Int64(4), Value::String("Z".into())]),
173        ];
174
175        let left = Relation::from_rows_owned(left_rows, vec!["left".into()]);
176        let right = Relation::from_rows_owned(right_rows, vec!["right".into()]);
177
178        let join = NestedLoopJoin::inner(0, 0);
179        let result = join.execute(left, right);
180
181        // Should match on keys 1 and 2
182        assert_eq!(result.len(), 2);
183    }
184
185    #[test]
186    fn test_nested_loop_join_range() {
187        let left_rows = vec![
188            Row::new(0, vec![Value::Int64(10)]),
189            Row::new(1, vec![Value::Int64(20)]),
190        ];
191        let right_rows = vec![
192            Row::new(10, vec![Value::Int64(5)]),
193            Row::new(11, vec![Value::Int64(15)]),
194            Row::new(12, vec![Value::Int64(25)]),
195        ];
196
197        let left = Relation::from_rows_owned(left_rows, vec!["left".into()]);
198        let right = Relation::from_rows_owned(right_rows, vec!["right".into()]);
199
200        let join = NestedLoopJoin::inner(0, 0);
201        // left.value > right.value
202        let result = join.execute_with_predicate(left, right, |l, r| l > r);
203
204        // 10 > 5, 20 > 5, 20 > 15 = 3 matches
205        assert_eq!(result.len(), 3);
206    }
207
208    #[test]
209    fn test_nested_loop_join_left_outer() {
210        let left_rows = vec![
211            Row::new(0, vec![Value::Int64(1)]),
212            Row::new(1, vec![Value::Int64(2)]),
213            Row::new(2, vec![Value::Int64(3)]),
214        ];
215        let right_rows = vec![
216            Row::new(10, vec![Value::Int64(1)]),
217        ];
218
219        let left = Relation::from_rows_owned(left_rows, vec!["left".into()]);
220        let right = Relation::from_rows_owned(right_rows, vec!["right".into()]);
221
222        let join = NestedLoopJoin::left_outer(0, 0);
223        let result = join.execute(left, right);
224
225        // 1 match + 2 unmatched with nulls
226        assert_eq!(result.len(), 3);
227    }
228
229    #[test]
230    fn test_nested_loop_join_function() {
231        let left = vec![(1, "A"), (2, "B"), (3, "C")];
232        let right = vec![(1, "X"), (2, "Y"), (4, "Z")];
233
234        let result = nested_loop_join(
235            &left,
236            &right,
237            |l, r| l.0 == r.0,
238            |l, r| (l.1, r.1),
239        );
240
241        assert_eq!(result.len(), 2);
242        assert!(result.contains(&("A", "X")));
243        assert!(result.contains(&("B", "Y")));
244    }
245
246    #[test]
247    fn test_nested_loop_join_with_nulls() {
248        let left_rows = vec![
249            Row::new(0, vec![Value::Int64(1)]),
250            Row::new(1, vec![Value::Null]),
251        ];
252        let right_rows = vec![
253            Row::new(10, vec![Value::Int64(1)]),
254            Row::new(11, vec![Value::Null]),
255        ];
256
257        let left = Relation::from_rows_owned(left_rows, vec!["left".into()]);
258        let right = Relation::from_rows_owned(right_rows, vec!["right".into()]);
259
260        let join = NestedLoopJoin::inner(0, 0);
261        let result = join.execute(left, right);
262
263        // NULL values should not match
264        assert_eq!(result.len(), 1);
265    }
266}