differential_dataflow/operators/
consolidate.rs

1//! Aggregates the weights of equal records into at most one record.
2//!
3//! As differential dataflow streams are unordered and taken to be the accumulation of all records,
4//! no semantic change happens via `consolidate`. However, there is a practical difference between
5//! a collection that aggregates down to zero records, and one that actually has no records. The
6//! underlying system can more clearly see that no work must be done in the later case, and we can
7//! drop out of, e.g. iterative computations.
8
9use timely::dataflow::Scope;
10
11use crate::{Collection, ExchangeData, Hashable};
12use crate::difference::Semigroup;
13
14use crate::Data;
15use crate::lattice::Lattice;
16use crate::trace::{Batcher, Builder};
17
18/// Methods which require data be arrangeable.
19impl<G, D, R> Collection<G, D, R>
20where
21    G: Scope,
22    G::Timestamp: Data+Lattice,
23    D: ExchangeData+Hashable,
24    R: Semigroup+ExchangeData,
25{
26    /// Aggregates the weights of equal records into at most one record.
27    ///
28    /// This method uses the type `D`'s `hashed()` method to partition the data. The data are
29    /// accumulated in place, each held back until their timestamp has completed.
30    ///
31    /// # Examples
32    ///
33    /// ```
34    /// use differential_dataflow::input::Input;
35    ///
36    /// ::timely::example(|scope| {
37    ///
38    ///     let x = scope.new_collection_from(1 .. 10u32).1;
39    ///
40    ///     x.negate()
41    ///      .concat(&x)
42    ///      .consolidate() // <-- ensures cancellation occurs
43    ///      .assert_empty();
44    /// });
45    /// ```
46    pub fn consolidate(&self) -> Self {
47        use crate::trace::implementations::KeySpine;
48        self.consolidate_named::<KeySpine<_,_,_>>("Consolidate")
49    }
50
51    /// As `consolidate` but with the ability to name the operator and specify the trace type.
52    pub fn consolidate_named<Tr>(&self, name: &str) -> Self
53    where
54        Tr: crate::trace::Trace<KeyOwned = D,ValOwned = (),Time=G::Timestamp,Diff=R>+'static,
55        Tr::Batch: crate::trace::Batch,
56        Tr::Batcher: Batcher<Item = ((D,()),G::Timestamp,R), Time = G::Timestamp>,
57        Tr::Builder: Builder<Item = ((D,()),G::Timestamp,R), Time = G::Timestamp>,
58    {
59        use crate::operators::arrange::arrangement::Arrange;
60        use crate::trace::cursor::MyTrait;
61        self.map(|k| (k, ()))
62            .arrange_named::<Tr>(name)
63            .as_collection(|d, _| d.into_owned())
64    }
65
66    /// Aggregates the weights of equal records.
67    ///
68    /// Unlike `consolidate`, this method does not exchange data and does not
69    /// ensure that at most one copy of each `(data, time)` pair exists in the
70    /// results. Instead, it acts on each batch of data and collapses equivalent
71    /// `(data, time)` pairs found therein, suppressing any that accumulate to
72    /// zero.
73    ///
74    /// # Examples
75    ///
76    /// ```
77    /// use differential_dataflow::input::Input;
78    ///
79    /// ::timely::example(|scope| {
80    ///
81    ///     let x = scope.new_collection_from(1 .. 10u32).1;
82    ///
83    ///     // nothing to assert, as no particular guarantees.
84    ///     x.negate()
85    ///      .concat(&x)
86    ///      .consolidate_stream();
87    /// });
88    /// ```
89    pub fn consolidate_stream(&self) -> Self {
90
91        use timely::dataflow::channels::pact::Pipeline;
92        use timely::dataflow::operators::Operator;
93        use crate::collection::AsCollection;
94
95        self.inner
96            .unary(Pipeline, "ConsolidateStream", |_cap, _info| {
97
98                let mut vector = Vec::new();
99                move |input, output| {
100                    input.for_each(|time, data| {
101                        data.swap(&mut vector);
102                        crate::consolidation::consolidate_updates(&mut vector);
103                        output.session(&time).give_vec(&mut vector);
104                    })
105                }
106            })
107            .as_collection()
108    }
109}