zrx_stream/stream/operator/product.rs
1// Copyright (c) Zensical LLC <https://zensical.org>
2
3// SPDX-License-Identifier: MIT
4// Third-party contributions licensed under CLA
5
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to
8// deal in the Software without restriction, including without limitation the
9// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10// sell copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22// IN THE SOFTWARE.
23
24// ----------------------------------------------------------------------------
25
26//! Product operator.
27
28use ahash::HashMap;
29use zrx_scheduler::action::output::IntoOutputs;
30use zrx_scheduler::action::Descriptor;
31use zrx_scheduler::effect::Item;
32use zrx_scheduler::{Id, Value};
33use zrx_store::StoreMut;
34
35use crate::stream::operator::Operator;
36use crate::stream::value::Delta;
37use crate::stream::Stream;
38
39// ----------------------------------------------------------------------------
40// Structs
41// ----------------------------------------------------------------------------
42
43/// Product operator.
44struct Product<I, T, U> {
45 /// Store of items (left).
46 this: HashMap<I, T>,
47 /// Store of items (right).
48 that: HashMap<I, U>,
49}
50
51// ----------------------------------------------------------------------------
52// Implementations
53// ----------------------------------------------------------------------------
54
55impl<I, T> Stream<I, T>
56where
57 I: Id,
58 T: Value + Clone + Eq,
59{
60 pub fn product<U>(
61 &self, stream: &Stream<I, U>,
62 ) -> Stream<I, Delta<I, (T, U)>>
63 where
64 U: Value + Clone + Eq,
65 {
66 self.workflow.add_operator(
67 [self.id, stream.id],
68 Product::<I, T, U> {
69 this: HashMap::default(),
70 that: HashMap::default(),
71 },
72 )
73 }
74}
75
76// ----------------------------------------------------------------------------
77// Trait implementations
78// ----------------------------------------------------------------------------
79
80impl<I, T, U> Operator<I, T> for Product<I, T, U>
81where
82 I: Id,
83 T: Value + Clone + Eq,
84 U: Value + Clone + Eq,
85{
86 type Item<'a> = Item<&'a I, (Option<&'a T>, Option<&'a U>)>;
87
88 /// Handles the given item.
89 ///
90 /// Computing the cartesian product of two streams is a stateful operation
91 /// that requires maintaining internal stores for both input streams. When
92 /// an item is received from either stream, we'll update the corresponding
93 /// store and compute the product with all items in the other store. This
94 /// ensures that all combinations of items from both streams are emitted
95 /// differentially.
96 ///
97 /// This implementation assumes that the streams are synchronized, which is
98 /// given if both input streams either belong to the same frontier, or if
99 /// the scheduler ensures that both streams are backed by stores that are
100 /// synchronized. If this assumption does not hold, this operator may yield
101 /// duplicate emissions, as there's not way for the operator to know whether
102 /// absence of one of the input streams is due to a deletion, or because the
103 /// item has not yet arrived. We deem this acceptable for now, as it only
104 /// affects the efficiency of the operator, but not its correctness.
105 #[cfg_attr(
106 feature = "tracing",
107 tracing::instrument(level = "debug" skip_all, fields(id = %item.id))
108 )]
109 fn handle(&mut self, item: Self::Item<'_>) -> impl IntoOutputs<I> {
110 let mut items = Vec::new();
111
112 // The left (this) and right (that) values are optional, indicating
113 // whether there is an insertion or a deletion. Note that both values
114 // can be present or absent at the same time, which can indicate a
115 // self-join, but does not necessarily have to.
116 let (this, that) = item.data;
117 if let Some(data) = this {
118 // If the left item value is present, update the left store, and if
119 // it changed, compute the product of the item and the right store
120 if self.this.insert_if_changed(item.id, data)
121 && !self.that.is_empty()
122 {
123 let iter = self.that.iter().map(|(id, that)| {
124 Item::new(id.clone(), Some((data.clone(), that.clone())))
125 });
126 items.push(Item::new(
127 item.id.clone(),
128 Some(iter.collect::<Delta<_, _>>()),
129 ));
130 }
131 } else if that.is_none() && self.this.remove(item.id).is_some() {
132 // An item was present and was successfully removed, so we compute
133 // the product of the removed item and the right store
134 let iter = self.that.keys().map(|id| Item::new(id.clone(), None));
135 items.push(Item::new(
136 item.id.clone(),
137 Some(iter.collect::<Delta<_, _>>()),
138 ));
139 }
140
141 // After processing the left value, we do the same for the right value
142 // by updating the right store - basically the other way round
143 if let Some(data) = that {
144 // If the right item value is present, update the right store, and
145 // if it changed, compute the product of the left store and the item
146 if self.that.insert_if_changed(item.id, data) {
147 items.extend(self.this.iter().map(|(id, this)| {
148 Item::new(
149 id.clone(),
150 Some(Delta::from([Item::new(
151 item.id.clone(),
152 Some((this.clone(), data.clone())),
153 )])),
154 )
155 }));
156 }
157 } else if this.is_none() && self.that.remove(item.id).is_some() {
158 // An item was present, and could be removed, so we compute the
159 // product of the removed item and all items in the left store
160 items.extend(self.this.keys().map(|id| {
161 let inner = Item::new(item.id.clone(), None);
162 Item::new(id.clone(), Some(Delta::from([inner])))
163 }));
164 }
165
166 // Return deltas of items
167 items
168 }
169
170 /// Returns the descriptor.
171 #[inline]
172 fn descriptor(&self) -> Descriptor {
173 Descriptor::default()
174 }
175}