Skip to main content

differential_dataflow/trace/implementations/
chainless_batcher.rs

1//! A `Batcher` implementation based on merge sort.
2
3use timely::progress::frontier::AntichainRef;
4use timely::progress::{frontier::Antichain, Timestamp};
5
6use crate::logging::Logger;
7use crate::trace;
8
9/// A type that can be used as storage within a merge batcher.
10pub trait BatcherStorage<T: Timestamp> : Default + Sized {
11    /// Number of contained updates.
12    fn len(&self) -> usize;
13    /// Merges two storage containers into one.
14    ///
15    /// This is expected to consolidate updates as it goes.
16    fn merge(self, other: Self) -> Self;
17    /// Extracts elements not greater or equal to the frontier.
18    fn split(&mut self, frontier: AntichainRef<T>) -> Self;
19    /// Ensures `frontier` is less or equal to all contained times.
20    ///
21    /// Consider merging with `split`, but needed for new stores as well.
22    fn lower(&self, frontier: &mut Antichain<T>);
23}
24
25/// A batcher that simple merges `BatcherStorage` implementors.
26pub struct Batcher<T: Timestamp, S: BatcherStorage<T>> {
27    /// Each store is at least twice the size of the next.
28    storages: Vec<S>,
29    /// The lower bound of timestamps of the maintained updates.
30    lower: Antichain<T>,
31    /// The previosly minted frontier.
32    prior: Antichain<T>,
33
34    /// Logger for size accounting.
35    _logger: Option<Logger>,
36    /// Timely operator ID.
37    _operator_id: usize,
38}
39
40impl<T: Timestamp, S: BatcherStorage<T>> Batcher<T, S> {
41    /// Ensures lists decrease in size geometrically.
42    fn tidy(&mut self) {
43        self.storages.retain(|x| x.len() > 0);
44        self.storages.sort_by_key(|x| x.len());
45        self.storages.reverse();
46        while let Some(pos) = (1..self.storages.len()).position(|i| self.storages[i-1].len() < 2 * self.storages[i].len()) {
47            while self.storages.len() > pos + 1 {
48                let x = self.storages.pop().unwrap();
49                let y = self.storages.pop().unwrap();
50                self.storages.push(x.merge(y));
51                self.storages.sort_by_key(|x| x.len());
52                self.storages.reverse();
53            }
54        }
55    }
56}
57
58impl<T: Timestamp, S: BatcherStorage<T>> trace::Batcher for Batcher<T, S> {
59    type Time = T;
60    type Input = S;
61    type Output = S;
62
63    fn new(logger: Option<Logger>, operator_id: usize) -> Self {
64        Self {
65            storages: Vec::default(),
66            lower: Default::default(),
67            prior: Antichain::from_elem(T::minimum()),
68            _logger: logger,
69            _operator_id: operator_id,
70        }
71    }
72
73    fn push_container(&mut self, batch: &mut Self::Input) {
74        if batch.len() > 0 {
75            // TODO: This appears to be optional based on `frontier` only being called after `seal`.
76            //       For the moment, the trait doesn't promise this, but keep eyes on the cost.
77            batch.lower(&mut self.lower);
78            self.storages.push(std::mem::take(batch));
79            self.tidy();
80        }
81    }
82
83    fn seal<B: trace::Builder<Input=Self::Output, Time=Self::Time>>(&mut self, upper: Antichain<Self::Time>) -> B::Output {
84        let description = trace::Description::new(self.prior.clone(), upper.clone(), Antichain::new());
85        self.prior = upper.clone();
86        if let Some(mut store) = self.storages.pop() {
87            self.lower.clear();
88            let mut ship = store.split(upper.borrow());
89            let mut keep = store;
90            while let Some(mut store) = self.storages.pop() {
91                let split = store.split(upper.borrow());
92                ship = ship.merge(split);
93                keep = keep.merge(store);
94            }
95            keep.lower(&mut self.lower);
96            self.storages.push(keep);
97            B::seal(&mut vec![ship], description)
98        }
99        else {
100            B::seal(&mut vec![], description)
101        }
102    }
103
104    fn frontier(&mut self) -> AntichainRef<'_, Self::Time> { self.lower.borrow() }
105}