zrx_stream/stream/operator/
product.rs

1// Copyright (c) Zensical LLC <https://zensical.org>
2
3// SPDX-License-Identifier: MIT
4// Third-party contributions licensed under CLA
5
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to
8// deal in the Software without restriction, including without limitation the
9// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10// sell copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22// IN THE SOFTWARE.
23
24// ----------------------------------------------------------------------------
25
26//! Product operator.
27
28use ahash::HashMap;
29use zrx_scheduler::action::output::IntoOutputs;
30use zrx_scheduler::action::Descriptor;
31use zrx_scheduler::effect::Item;
32use zrx_scheduler::{Id, Value};
33use zrx_store::StoreMut;
34
35use crate::stream::operator::Operator;
36use crate::stream::value::Delta;
37use crate::stream::Stream;
38
39// ----------------------------------------------------------------------------
40// Structs
41// ----------------------------------------------------------------------------
42
43/// Product operator.
44struct Product<I, T, U> {
45    /// Store of items (left).
46    this: HashMap<I, T>,
47    /// Store of items (right).
48    that: HashMap<I, U>,
49}
50
51// ----------------------------------------------------------------------------
52// Implementations
53// ----------------------------------------------------------------------------
54
55impl<I, T> Stream<I, T>
56where
57    I: Id,
58    T: Value + Clone + Eq,
59{
60    pub fn product<U>(
61        &self, stream: &Stream<I, U>,
62    ) -> Stream<I, Delta<I, (T, U)>>
63    where
64        U: Value + Clone + Eq,
65    {
66        self.workflow.add_operator(
67            [self.id, stream.id],
68            Product::<I, T, U> {
69                this: HashMap::default(),
70                that: HashMap::default(),
71            },
72        )
73    }
74}
75
76// ----------------------------------------------------------------------------
77// Trait implementations
78// ----------------------------------------------------------------------------
79
80impl<I, T, U> Operator<I, T> for Product<I, T, U>
81where
82    I: Id,
83    T: Value + Clone + Eq,
84    U: Value + Clone + Eq,
85{
86    type Item<'a> = Item<&'a I, (Option<&'a T>, Option<&'a U>)>;
87
88    /// Handles the given item.
89    ///
90    /// Computing the cartesian product of two streams is a stateful operation
91    /// that requires maintaining internal stores for both input streams. When
92    /// an item is received from either stream, we'll update the corresponding
93    /// store and compute the product with all items in the other store. This
94    /// ensures that all combinations of items from both streams are emitted
95    /// differentially.
96    ///
97    /// This implementation assumes that the streams are synchronized, which is
98    /// given if both input streams either belong to the same frontier, or if
99    /// the scheduler ensures that both streams are backed by stores that are
100    /// synchronized. If this assumption does not hold, this operator may yield
101    /// duplicate emissions, as there's not way for the operator to know whether
102    /// absence of one of the input streams is due to a deletion, or because the
103    /// item has not yet arrived. We deem this acceptable for now, as it only
104    /// affects the efficiency of the operator, but not its correctness.
105    #[cfg_attr(
106        feature = "tracing",
107        tracing::instrument(level = "debug" skip_all, fields(id = %item.id))
108    )]
109    fn handle(&mut self, item: Self::Item<'_>) -> impl IntoOutputs<I> {
110        let mut items = Vec::new();
111
112        // The left (this) and right (that) values are optional, indicating
113        // whether there is an insertion or a deletion. Note that both values
114        // can be present or absent at the same time, which can indicate a
115        // self-join, but does not necessarily have to.
116        let (this, that) = item.data;
117        if let Some(data) = this {
118            // If the left item value is present, update the left store, and if
119            // it changed, compute the product of the item and the right store
120            if self.this.insert_if_changed(item.id, data)
121                && !self.that.is_empty()
122            {
123                let iter = self.that.iter().map(|(id, that)| {
124                    Item::new(id.clone(), Some((data.clone(), that.clone())))
125                });
126                items.push(Item::new(
127                    item.id.clone(),
128                    Some(iter.collect::<Delta<_, _>>()),
129                ));
130            }
131        } else if that.is_none() && self.this.remove(item.id).is_some() {
132            // An item was present and was successfully removed, so we compute
133            // the product of the removed item and the right store
134            let iter = self.that.keys().map(|id| Item::new(id.clone(), None));
135            items.push(Item::new(
136                item.id.clone(),
137                Some(iter.collect::<Delta<_, _>>()),
138            ));
139        }
140
141        // After processing the left value, we do the same for the right value
142        // by updating the right store - basically the other way round
143        if let Some(data) = that {
144            // If the right item value is present, update the right store, and
145            // if it changed, compute the product of the left store and the item
146            if self.that.insert_if_changed(item.id, data) {
147                items.extend(self.this.iter().map(|(id, this)| {
148                    Item::new(
149                        id.clone(),
150                        Some(Delta::from([Item::new(
151                            item.id.clone(),
152                            Some((this.clone(), data.clone())),
153                        )])),
154                    )
155                }));
156            }
157        } else if this.is_none() && self.that.remove(item.id).is_some() {
158            // An item was present, and could be removed, so we compute the
159            // product of the removed item and all items in the left store
160            items.extend(self.this.keys().map(|id| {
161                let inner = Item::new(item.id.clone(), None);
162                Item::new(id.clone(), Some(Delta::from([inner])))
163            }));
164        }
165
166        // Return deltas of items
167        items
168    }
169
170    /// Returns the descriptor.
171    #[inline]
172    fn descriptor(&self) -> Descriptor {
173        Descriptor::default()
174    }
175}