rust_queries_core/
lazy_join.rs

1//! Lazy join query implementation for combining multiple collections with deferred execution.
2//!
3//! This module provides lazy join operations that return iterators instead of eagerly
4//! collecting results, enabling early termination and memory efficiency.
5
6use key_paths_core::KeyPaths;
7use std::collections::HashMap;
8
9/// A lazy join query builder that returns iterators instead of collecting results.
10///
11/// Supports inner joins and left joins with deferred execution for better performance
12/// on large datasets.
13///
14/// # Type Parameters
15///
16/// * `'a` - The lifetime of the data being joined
17/// * `L` - The type of items in the left collection
18/// * `R` - The type of items in the right collection
19///
20/// # Example
21///
22/// ```ignore
23/// let user_orders = LazyJoinQuery::new(&users, &orders)
24///     .inner_join_lazy(
25///         User::id(),
26///         Order::user_id(),
27///         |user, order| (user.name.clone(), order.total)
28///     );
29///
30/// // Nothing executed yet - just an iterator
31/// let first_5: Vec<_> = user_orders.take(5).collect();
32/// ```
33pub struct LazyJoinQuery<'a, L: 'static, R: 'static> {
34    left: &'a [L],
35    right: &'a [R],
36}
37
38impl<'a, L: 'static, R: 'static> LazyJoinQuery<'a, L, R> {
39    /// Creates a new lazy join query from two collections.
40    ///
41    /// # Arguments
42    ///
43    /// * `left` - The left collection to join
44    /// * `right` - The right collection to join
45    ///
46    /// # Example
47    ///
48    /// ```ignore
49    /// let join = LazyJoinQuery::new(&users, &orders);
50    /// ```
51    pub fn new(left: &'a [L], right: &'a [R]) -> Self {
52        Self { left, right }
53    }
54
55    /// Performs a lazy inner join between two collections.
56    ///
57    /// Returns an iterator over matching pairs. The join is evaluated lazily,
58    /// allowing for early termination and memory efficiency.
59    ///
60    /// # Arguments
61    ///
62    /// * `left_key` - Key-path to the join field in the left collection
63    /// * `right_key` - Key-path to the join field in the right collection
64    /// * `mapper` - Function to transform matching pairs into the result type
65    ///
66    /// # Example
67    ///
68    /// ```ignore
69    /// let results: Vec<_> = LazyJoinQuery::new(&users, &orders)
70    ///     .inner_join_lazy(
71    ///         User::id(),
72    ///         Order::user_id(),
73    ///         |user, order| (user.name.clone(), order.total)
74    ///     )
75    ///     .take(10)  // Early termination - only process first 10 matches
76    ///     .collect();
77    /// ```
78    pub fn inner_join_lazy<K, O, F>(
79        &self,
80        left_key: KeyPaths<L, K>,
81        right_key: KeyPaths<R, K>,
82        mapper: F,
83    ) -> impl Iterator<Item = O> + 'a
84    where
85        K: Eq + std::hash::Hash + Clone + 'static,
86        F: Fn(&'a L, &'a R) -> O + 'a,
87        O: 'a,
88    {
89        // Build index for right side for O(n) lookup
90        let mut right_index: HashMap<K, Vec<&'a R>> = HashMap::new();
91        for item in self.right.iter() {
92            if let Some(key) = right_key.get(item).cloned() {
93                right_index.entry(key).or_insert_with(Vec::new).push(item);
94            }
95        }
96
97        // Return iterator that lazily evaluates the join
98        // We need to collect matches first to avoid closure capture issues
99        self.left.iter().flat_map(move |left_item| {
100            let key_opt = left_key.get(left_item).cloned();
101            
102            if let Some(key) = key_opt {
103                if let Some(right_items) = right_index.get(&key) {
104                    // Collect matches into Vec to avoid closure capture issues
105                    let matches: Vec<O> = right_items.iter()
106                        .map(|right_item| mapper(left_item, right_item))
107                        .collect();
108                    matches.into_iter()
109                } else {
110                    // No matches - return empty iterator
111                    Vec::<O>::new().into_iter()
112                }
113            } else {
114                // No key - return empty iterator
115                Vec::<O>::new().into_iter()
116            }
117        })
118    }
119
120    /// Performs a lazy left join between two collections.
121    ///
122    /// Returns an iterator over all left items with optional right matches.
123    /// The join is evaluated lazily, allowing for early termination.
124    ///
125    /// # Arguments
126    ///
127    /// * `left_key` - Key-path to the join field in the left collection
128    /// * `right_key` - Key-path to the join field in the right collection
129    /// * `mapper` - Function to transform pairs into the result type (right item may be None)
130    ///
131    /// # Example
132    ///
133    /// ```ignore
134    /// let results: Vec<_> = LazyJoinQuery::new(&users, &orders)
135    ///     .left_join_lazy(
136    ///         User::id(),
137    ///         Order::user_id(),
138    ///         |user, order| match order {
139    ///             Some(o) => format!("{} has order {}", user.name, o.id),
140    ///             None => format!("{} has no orders", user.name),
141    ///         }
142    ///     )
143    ///     .take(5)  // Early termination
144    ///     .collect();
145    /// ```
146    pub fn left_join_lazy<K, O, F>(
147        &self,
148        left_key: KeyPaths<L, K>,
149        right_key: KeyPaths<R, K>,
150        mapper: F,
151    ) -> impl Iterator<Item = O> + 'a
152    where
153        K: Eq + std::hash::Hash + Clone + 'static,
154        F: Fn(&'a L, Option<&'a R>) -> O + 'a,
155        O: 'a,
156    {
157        // Build index for right side
158        let mut right_index: HashMap<K, Vec<&'a R>> = HashMap::new();
159        for item in self.right.iter() {
160            if let Some(key) = right_key.get(item).cloned() {
161                right_index.entry(key).or_insert_with(Vec::new).push(item);
162            }
163        }
164
165        // Return iterator that lazily evaluates the join
166        self.left.iter().flat_map(move |left_item| {
167            let key_opt = left_key.get(left_item).cloned();
168            
169            if let Some(key) = key_opt {
170                if let Some(right_items) = right_index.get(&key) {
171                    // Has matches - yield all matches
172                    let matches: Vec<O> = right_items.iter()
173                        .map(|right_item| mapper(left_item, Some(right_item)))
174                        .collect();
175                    matches.into_iter()
176                } else {
177                    // No matches - yield None
178                    vec![mapper(left_item, None)].into_iter()
179                }
180            } else {
181                // No key - yield None
182                vec![mapper(left_item, None)].into_iter()
183            }
184        })
185    }
186
187    /// Performs a lazy inner join with an additional filter predicate.
188    ///
189    /// Like `inner_join_lazy`, but only includes pairs that satisfy both the join
190    /// condition and the additional predicate.
191    ///
192    /// # Arguments
193    ///
194    /// * `left_key` - Key-path to the join field in the left collection
195    /// * `right_key` - Key-path to the join field in the right collection
196    /// * `predicate` - Additional condition that must be true for pairs to be included
197    /// * `mapper` - Function to transform matching pairs into the result type
198    ///
199    /// # Example
200    ///
201    /// ```ignore
202    /// let results: Vec<_> = LazyJoinQuery::new(&orders, &products)
203    ///     .inner_join_where_lazy(
204    ///         Order::product_id(),
205    ///         Product::id(),
206    ///         |order, _product| order.total > 100.0,
207    ///         |order, product| (product.name.clone(), order.total)
208    ///     )
209    ///     .take(10)
210    ///     .collect();
211    /// ```
212    pub fn inner_join_where_lazy<K, O, F, P>(
213        &self,
214        left_key: KeyPaths<L, K>,
215        right_key: KeyPaths<R, K>,
216        predicate: P,
217        mapper: F,
218    ) -> impl Iterator<Item = O> + 'a
219    where
220        K: Eq + std::hash::Hash + Clone + 'static,
221        F: Fn(&'a L, &'a R) -> O + 'a,
222        P: Fn(&'a L, &'a R) -> bool + 'a,
223        O: 'a,
224    {
225        // Build index for right side
226        let mut right_index: HashMap<K, Vec<&'a R>> = HashMap::new();
227        for item in self.right.iter() {
228            if let Some(key) = right_key.get(item).cloned() {
229                right_index.entry(key).or_insert_with(Vec::new).push(item);
230            }
231        }
232
233        // Return iterator that lazily evaluates the join with predicate
234        // We need to collect matches first to avoid closure capture issues
235        self.left.iter().flat_map(move |left_item| {
236            let key_opt = left_key.get(left_item).cloned();
237            
238            if let Some(key) = key_opt {
239                if let Some(right_items) = right_index.get(&key) {
240                    // Collect filtered matches into Vec to avoid closure capture issues
241                    let matches: Vec<O> = right_items.iter()
242                        .filter(|right_item| predicate(left_item, right_item))
243                        .map(|right_item| mapper(left_item, right_item))
244                        .collect();
245                    matches.into_iter()
246                } else {
247                    // No matches - return empty iterator
248                    Vec::<O>::new().into_iter()
249                }
250            } else {
251                // No key - return empty iterator
252                Vec::<O>::new().into_iter()
253            }
254        })
255    }
256}
257