palimpsest_dataflow/operators/consolidate.rs
1//! Aggregates the weights of equal records into at most one record.
2//!
3//! As differential dataflow streams are unordered and taken to be the accumulation of all records,
4//! no semantic change happens via `consolidate`. However, there is a practical difference between
5//! a collection that aggregates down to zero records, and one that actually has no records. The
6//! underlying system can more clearly see that no work must be done in the later case, and we can
7//! drop out of, e.g. iterative computations.
8
9use timely::dataflow::Scope;
10
11use crate::consolidation::ConsolidatingContainerBuilder;
12use crate::difference::Semigroup;
13use crate::{ExchangeData, Hashable, VecCollection};
14
15use crate::lattice::Lattice;
16use crate::trace::{Batcher, Builder};
17use crate::Data;
18
19/// Methods which require data be arrangeable.
20impl<G, D, R> VecCollection<G, D, R>
21where
22 G: Scope<Timestamp: Data + Lattice>,
23 D: ExchangeData + Hashable,
24 R: Semigroup + ExchangeData,
25{
26 /// Aggregates the weights of equal records into at most one record.
27 ///
28 /// This method uses the type `D`'s `hashed()` method to partition the data. The data are
29 /// accumulated in place, each held back until their timestamp has completed.
30 ///
31 /// # Examples
32 ///
33 /// ```
34 /// use palimpsest_dataflow::input::Input;
35 ///
36 /// ::timely::example(|scope| {
37 ///
38 /// let x = scope.new_collection_from(1 .. 10u32).1;
39 ///
40 /// x.negate()
41 /// .concat(&x)
42 /// .consolidate() // <-- ensures cancellation occurs
43 /// .assert_empty();
44 /// });
45 /// ```
46 pub fn consolidate(&self) -> Self {
47 use crate::trace::implementations::{KeyBatcher, KeyBuilder, KeySpine};
48 self.consolidate_named::<KeyBatcher<_, _, _>, KeyBuilder<_, _, _>, KeySpine<_, _, _>, _>(
49 "Consolidate",
50 |key, &()| key.clone(),
51 )
52 }
53
54 /// As `consolidate` but with the ability to name the operator, specify the trace type,
55 /// and provide the function `reify` to produce owned keys and values..
56 pub fn consolidate_named<Ba, Bu, Tr, F>(&self, name: &str, reify: F) -> Self
57 where
58 Ba: Batcher<Input = Vec<((D, ()), G::Timestamp, R)>, Time = G::Timestamp> + 'static,
59 Tr: for<'a> crate::trace::Trace<Time = G::Timestamp, Diff = R> + 'static,
60 Bu: Builder<Time = Tr::Time, Input = Ba::Output, Output = Tr::Batch>,
61 F: Fn(Tr::Key<'_>, Tr::Val<'_>) -> D + 'static,
62 {
63 use crate::operators::arrange::arrangement::Arrange;
64 self.map(|k| (k, ()))
65 .arrange_named::<Ba, Bu, Tr>(name)
66 .as_collection(reify)
67 }
68
69 /// Aggregates the weights of equal records.
70 ///
71 /// Unlike `consolidate`, this method does not exchange data and does not
72 /// ensure that at most one copy of each `(data, time)` pair exists in the
73 /// results. Instead, it acts on each batch of data and collapses equivalent
74 /// `(data, time)` pairs found therein, suppressing any that accumulate to
75 /// zero.
76 ///
77 /// # Examples
78 ///
79 /// ```
80 /// use palimpsest_dataflow::input::Input;
81 ///
82 /// ::timely::example(|scope| {
83 ///
84 /// let x = scope.new_collection_from(1 .. 10u32).1;
85 ///
86 /// // nothing to assert, as no particular guarantees.
87 /// x.negate()
88 /// .concat(&x)
89 /// .consolidate_stream();
90 /// });
91 /// ```
92 pub fn consolidate_stream(&self) -> Self {
93 use crate::collection::AsCollection;
94 use timely::dataflow::channels::pact::Pipeline;
95 use timely::dataflow::operators::Operator;
96
97 self.inner
98 .unary::<ConsolidatingContainerBuilder<_>, _, _, _>(
99 Pipeline,
100 "ConsolidateStream",
101 |_cap, _info| {
102 move |input, output| {
103 input.for_each(|time, data| {
104 output
105 .session_with_builder(&time)
106 .give_iterator(data.drain(..));
107 })
108 }
109 },
110 )
111 .as_collection()
112 }
113}