timely/dataflow/operators/
capability.rs

1//! Capabilities to send data from operators
2//!
3//! Timely dataflow operators are only able to send data if they possess a "capability",
4//! a system-created object which warns the runtime that the operator may still produce
5//! output records.
6//!
7//! The timely dataflow runtime creates a capability and provides it to an operator whenever
8//! the operator receives input data. The capabilities allow the operator to respond to the
9//! received data, immediately or in the future, for as long as the capability is held.
10//!
11//! Timely dataflow's progress tracking infrastructure communicates the number of outstanding
12//! capabilities across all workers.
13//! Each operator may hold on to its capabilities, and may clone, advance, and drop them.
14//! Each of these actions informs the timely dataflow runtime of changes to the number of outstanding
15//! capabilities, so that the runtime can notice when the count for some capability reaches zero.
16//! While an operator can hold capabilities indefinitely, and create as many new copies of them
17//! as it would like, the progress tracking infrastructure will not move forward until the
18//! operators eventually release their capabilities.
19//!
20//! Note that these capabilities currently lack the property of "transferability":
21//! An operator should not hand its capabilities to some other operator. In the future, we should
22//! probably bind capabilities more strongly to a specific operator and output.
23
24use std::{borrow, error::Error, fmt::Display, ops::Deref};
25use std::rc::Rc;
26use std::cell::RefCell;
27use std::fmt::{self, Debug};
28
29use crate::order::PartialOrder;
30use crate::progress::Antichain;
31use crate::progress::Timestamp;
32use crate::progress::ChangeBatch;
33use crate::scheduling::Activations;
34use crate::dataflow::channels::pullers::counter::ConsumedGuard;
35
36/// An internal trait expressing the capability to send messages with a given timestamp.
37pub trait CapabilityTrait<T: Timestamp> {
38    /// The timestamp associated with the capability.
39    fn time(&self) -> &T;
40    fn valid_for_output(&self, query_buffer: &Rc<RefCell<ChangeBatch<T>>>) -> bool;
41}
42
43impl<'a, T: Timestamp, C: CapabilityTrait<T>> CapabilityTrait<T> for &'a C {
44    fn time(&self) -> &T { (**self).time() }
45    fn valid_for_output(&self, query_buffer: &Rc<RefCell<ChangeBatch<T>>>) -> bool {
46        (**self).valid_for_output(query_buffer)
47    }
48}
49impl<'a, T: Timestamp, C: CapabilityTrait<T>> CapabilityTrait<T> for &'a mut C {
50    fn time(&self) -> &T { (**self).time() }
51    fn valid_for_output(&self, query_buffer: &Rc<RefCell<ChangeBatch<T>>>) -> bool {
52        (**self).valid_for_output(query_buffer)
53    }
54}
55
56/// The capability to send data with a certain timestamp on a dataflow edge.
57///
58/// Capabilities are used by timely dataflow's progress tracking machinery to restrict and track
59/// when user code retains the ability to send messages on dataflow edges. All capabilities are
60/// constructed by the system, and should eventually be dropped by the user. Failure to drop
61/// a capability (for whatever reason) will cause timely dataflow's progress tracking to stall.
62pub struct Capability<T: Timestamp> {
63    time: T,
64    internal: Rc<RefCell<ChangeBatch<T>>>,
65}
66
67impl<T: Timestamp> CapabilityTrait<T> for Capability<T> {
68    fn time(&self) -> &T { &self.time }
69    fn valid_for_output(&self, query_buffer: &Rc<RefCell<ChangeBatch<T>>>) -> bool {
70        Rc::ptr_eq(&self.internal, query_buffer)
71    }
72}
73
74impl<T: Timestamp> Capability<T> {
75    /// Creates a new capability at `time` while incrementing (and keeping a reference to) the provided
76    /// [`ChangeBatch`].
77    pub(crate) fn new(time: T, internal: Rc<RefCell<ChangeBatch<T>>>) -> Self {
78        internal.borrow_mut().update(time.clone(), 1);
79
80        Self {
81            time,
82            internal,
83        }
84    }
85
86    /// The timestamp associated with this capability.
87    pub fn time(&self) -> &T {
88        &self.time
89    }
90
91    /// Makes a new capability for a timestamp `new_time` greater or equal to the timestamp of
92    /// the source capability (`self`).
93    ///
94    /// This method panics if `self.time` is not less or equal to `new_time`.
95    pub fn delayed(&self, new_time: &T) -> Capability<T> {
96        /// Makes the panic branch cold & outlined to decrease code bloat & give
97        /// the inner function the best chance possible of being inlined with
98        /// minimal code bloat
99        #[cold]
100        #[inline(never)]
101        fn delayed_panic(capability: &dyn Debug, invalid_time: &dyn Debug) -> ! {
102            // Formatting & panic machinery is relatively expensive in terms of code bloat, so
103            // we outline it
104            panic!(
105                "Attempted to delay {:?} to {:?}, which is not beyond the capability's time.",
106                capability,
107                invalid_time,
108            )
109        }
110
111        self.try_delayed(new_time)
112            .unwrap_or_else(|| delayed_panic(self, new_time))
113    }
114
115    /// Attempts to make a new capability for a timestamp `new_time` that is
116    /// greater or equal to the timestamp of the source capability (`self`).
117    ///
118    /// Returns [`None`] `self.time` is not less or equal to `new_time`.
119    pub fn try_delayed(&self, new_time: &T) -> Option<Capability<T>> {
120        if self.time.less_equal(new_time) {
121            Some(Self::new(new_time.clone(), self.internal.clone()))
122        } else {
123            None
124        }
125    }
126
127    /// Downgrades the capability to one corresponding to `new_time`.
128    ///
129    /// This method panics if `self.time` is not less or equal to `new_time`.
130    pub fn downgrade(&mut self, new_time: &T) {
131        /// Makes the panic branch cold & outlined to decrease code bloat & give
132        /// the inner function the best chance possible of being inlined with
133        /// minimal code bloat
134        #[cold]
135        #[inline(never)]
136        fn downgrade_panic(capability: &dyn Debug, invalid_time: &dyn Debug) -> ! {
137            // Formatting & panic machinery is relatively expensive in terms of code bloat, so
138            // we outline it
139            panic!(
140                "Attempted to downgrade {:?} to {:?}, which is not beyond the capability's time.",
141                capability,
142                invalid_time,
143            )
144        }
145
146        self.try_downgrade(new_time)
147            .unwrap_or_else(|_| downgrade_panic(self, new_time))
148    }
149
150    /// Attempts to downgrade the capability to one corresponding to `new_time`.
151    ///
152    /// Returns a [DowngradeError] if `self.time` is not less or equal to `new_time`.
153    pub fn try_downgrade(&mut self, new_time: &T) -> Result<(), DowngradeError> {
154        if let Some(new_capability) = self.try_delayed(new_time) {
155            *self = new_capability;
156            Ok(())
157        } else {
158            Err(DowngradeError(()))
159        }
160    }
161}
162
163// Necessary for correctness. When a capability is dropped, the "internal" `ChangeBatch` needs to be
164// updated accordingly to inform the rest of the system that the operator has released its permit
165// to send data and request notification at the associated timestamp.
166impl<T: Timestamp> Drop for Capability<T> {
167    fn drop(&mut self) {
168        self.internal.borrow_mut().update(self.time.clone(), -1);
169    }
170}
171
172impl<T: Timestamp> Clone for Capability<T> {
173    fn clone(&self) -> Capability<T> {
174        Self::new(self.time.clone(), self.internal.clone())
175    }
176}
177
178impl<T: Timestamp> Deref for Capability<T> {
179    type Target = T;
180
181    fn deref(&self) -> &T {
182        &self.time
183    }
184}
185
186impl<T: Timestamp> Debug for Capability<T> {
187    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
188        f.debug_struct("Capability")
189            .field("time", &self.time)
190            .field("internal", &"...")
191            .finish()
192    }
193}
194
195impl<T: Timestamp> PartialEq for Capability<T> {
196    fn eq(&self, other: &Self) -> bool {
197        self.time() == other.time() && Rc::ptr_eq(&self.internal, &other.internal)
198    }
199}
200impl<T: Timestamp> Eq for Capability<T> { }
201
202impl<T: Timestamp> PartialOrder for Capability<T> {
203    fn less_equal(&self, other: &Self) -> bool {
204        self.time().less_equal(other.time()) && Rc::ptr_eq(&self.internal, &other.internal)
205    }
206}
207
208impl<T: Timestamp> ::std::hash::Hash for Capability<T> {
209    fn hash<H: ::std::hash::Hasher>(&self, state: &mut H) {
210        self.time.hash(state);
211    }
212}
213
214/// An error produced when trying to downgrade a capability with a time
215/// that's not less than or equal to the current capability
216#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
217pub struct DowngradeError(());
218
219impl Display for DowngradeError {
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        f.write_str("could not downgrade the given capability")
222    }
223}
224
225impl Error for DowngradeError {}
226
227/// A shared list of shared output capability buffers.
228type CapabilityUpdates<T> = Rc<RefCell<Vec<Rc<RefCell<ChangeBatch<T>>>>>>;
229
230/// An capability of an input port. Holding onto this capability will implicitly holds onto a
231/// capability for all the outputs ports this input is connected to, after the connection summaries
232/// have been applied.
233///
234/// This input capability supplies a `retain_for_output(self)` method which consumes the input
235/// capability and turns it into a [Capability] for a specific output port.
236pub struct InputCapability<T: Timestamp> {
237    /// Output capability buffers, for use in minting capabilities.
238    internal: CapabilityUpdates<T>,
239    /// Timestamp summaries for each output.
240    summaries: Rc<RefCell<Vec<Antichain<T::Summary>>>>,
241    /// A drop guard that updates the consumed capability this InputCapability refers to on drop
242    consumed_guard: ConsumedGuard<T>,
243}
244
245impl<T: Timestamp> CapabilityTrait<T> for InputCapability<T> {
246    fn time(&self) -> &T { self.time() }
247    fn valid_for_output(&self, query_buffer: &Rc<RefCell<ChangeBatch<T>>>) -> bool {
248        let borrow = self.summaries.borrow();
249        self.internal.borrow().iter().enumerate().any(|(index, rc)| {
250            // To be valid, the output buffer must match and the timestamp summary needs to be the default.
251            Rc::ptr_eq(rc, query_buffer) && borrow[index].len() == 1 && borrow[index][0] == Default::default()
252        })
253    }
254}
255
256impl<T: Timestamp> InputCapability<T> {
257    /// Creates a new capability reference at `time` while incrementing (and keeping a reference to)
258    /// the provided [`ChangeBatch`].
259    pub(crate) fn new(internal: CapabilityUpdates<T>, summaries: Rc<RefCell<Vec<Antichain<T::Summary>>>>, guard: ConsumedGuard<T>) -> Self {
260        InputCapability {
261            internal,
262            summaries,
263            consumed_guard: guard,
264        }
265    }
266
267    /// The timestamp associated with this capability.
268    pub fn time(&self) -> &T {
269        self.consumed_guard.time()
270    }
271
272    /// Makes a new capability for a timestamp `new_time` greater or equal to the timestamp of
273    /// the source capability (`self`).
274    ///
275    /// This method panics if `self.time` is not less or equal to `new_time`.
276    pub fn delayed(&self, new_time: &T) -> Capability<T> {
277        self.delayed_for_output(new_time, 0)
278    }
279
280    /// Delays capability for a specific output port.
281    pub fn delayed_for_output(&self, new_time: &T, output_port: usize) -> Capability<T> {
282        use crate::progress::timestamp::PathSummary;
283        if self.summaries.borrow()[output_port].iter().flat_map(|summary| summary.results_in(self.time())).any(|time| time.less_equal(new_time)) {
284            Capability::new(new_time.clone(), self.internal.borrow()[output_port].clone())
285        } else {
286            panic!("Attempted to delay to a time ({:?}) not greater or equal to the operators input-output summary ({:?}) applied to the capabilities time ({:?})", new_time, self.summaries.borrow()[output_port], self.time());
287        }
288    }
289
290    /// Transform to an owned capability.
291    ///
292    /// This method produces an owned capability which must be dropped to release the
293    /// capability. Users should take care that these capabilities are only stored for
294    /// as long as they are required, as failing to drop them may result in livelock.
295    ///
296    /// This method panics if the timestamp summary to output zero strictly advances the time.
297    pub fn retain(self) -> Capability<T> {
298        self.retain_for_output(0)
299    }
300
301    /// Transforms to an owned capability for a specific output port.
302    ///
303    /// This method panics if the timestamp summary to `output_port` strictly advances the time.
304    pub fn retain_for_output(self, output_port: usize) -> Capability<T> {
305        use crate::progress::timestamp::PathSummary;
306        let self_time = self.time().clone();
307        if self.summaries.borrow()[output_port].iter().flat_map(|summary| summary.results_in(&self_time)).any(|time| time.less_equal(&self_time)) {
308            Capability::new(self_time, self.internal.borrow()[output_port].clone())
309        }
310        else {
311            panic!("Attempted to retain a time ({:?}) not greater or equal to the operators input-output summary ({:?}) applied to the capabilities time ({:?})", self_time, self.summaries.borrow()[output_port], self_time);
312        }
313    }
314}
315
316impl<T: Timestamp> Deref for InputCapability<T> {
317    type Target = T;
318
319    fn deref(&self) -> &T {
320        self.time()
321    }
322}
323
324impl<T: Timestamp> Debug for InputCapability<T> {
325    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
326        f.debug_struct("InputCapability")
327            .field("time", self.time())
328            .field("internal", &"...")
329            .finish()
330    }
331}
332
333/// Capability that activates on drop.
334#[derive(Clone, Debug)]
335pub struct ActivateCapability<T: Timestamp> {
336    pub(crate) capability: Capability<T>,
337    pub(crate) address: Rc<Vec<usize>>,
338    pub(crate) activations: Rc<RefCell<Activations>>,
339}
340
341impl<T: Timestamp> CapabilityTrait<T> for ActivateCapability<T> {
342    fn time(&self) -> &T { self.capability.time() }
343    fn valid_for_output(&self, query_buffer: &Rc<RefCell<ChangeBatch<T>>>) -> bool {
344        self.capability.valid_for_output(query_buffer)
345    }
346}
347
348impl<T: Timestamp> ActivateCapability<T> {
349    /// Creates a new activating capability.
350    pub fn new(capability: Capability<T>, address: &[usize], activations: Rc<RefCell<Activations>>) -> Self {
351        Self {
352            capability,
353            address: Rc::new(address.to_vec()),
354            activations,
355        }
356    }
357
358    /// The timestamp associated with this capability.
359    pub fn time(&self) -> &T {
360        self.capability.time()
361    }
362
363    /// Creates a new delayed capability.
364    pub fn delayed(&self, time: &T) -> Self {
365        ActivateCapability {
366            capability: self.capability.delayed(time),
367            address: self.address.clone(),
368            activations: self.activations.clone(),
369        }
370    }
371
372    /// Downgrades this capability.
373    pub fn downgrade(&mut self, time: &T) {
374        self.capability.downgrade(time);
375        self.activations.borrow_mut().activate(&self.address);
376    }
377}
378
379impl<T: Timestamp> Drop for ActivateCapability<T> {
380    fn drop(&mut self) {
381        self.activations.borrow_mut().activate(&self.address);
382    }
383}
384
385/// A set of capabilities, for possibly incomparable times.
386#[derive(Clone, Debug)]
387pub struct CapabilitySet<T: Timestamp> {
388    elements: Vec<Capability<T>>,
389}
390
391impl<T: Timestamp> CapabilitySet<T> {
392
393    /// Allocates an empty capability set.
394    pub fn new() -> Self {
395        Self { elements: Vec::new() }
396    }
397
398    /// Allocates an empty capability set with space for `capacity` elements
399    pub fn with_capacity(capacity: usize) -> Self {
400        Self { elements: Vec::with_capacity(capacity) }
401    }
402
403    /// Allocates a capability set containing a single capability.
404    ///
405    /// # Examples
406    /// ```
407    /// use std::collections::HashMap;
408    /// use timely::dataflow::{
409    ///     operators::{ToStream, generic::Operator},
410    ///     channels::pact::Pipeline,
411    /// };
412    /// use timely::dataflow::operators::CapabilitySet;
413    ///
414    /// timely::example(|scope| {
415    ///     vec![()].into_iter().to_stream(scope)
416    ///         .unary_frontier(Pipeline, "example", |default_cap, _info| {
417    ///             let mut cap = CapabilitySet::from_elem(default_cap);
418    ///             let mut vector = Vec::new();
419    ///             move |input, output| {
420    ///                 cap.downgrade(&input.frontier().frontier());
421    ///                 while let Some((time, data)) = input.next() {
422    ///                     data.swap(&mut vector);
423    ///                 }
424    ///                 let a_cap = cap.first();
425    ///                 if let Some(a_cap) = a_cap.as_ref() {
426    ///                     output.session(a_cap).give(());
427    ///                 }
428    ///             }
429    ///         });
430    /// });
431    /// ```
432    pub fn from_elem(cap: Capability<T>) -> Self {
433        Self { elements: vec![cap] }
434    }
435
436    /// Inserts `capability` into the set, discarding redundant capabilities.
437    pub fn insert(&mut self, capability: Capability<T>) {
438        if !self.elements.iter().any(|c| c.less_equal(&capability)) {
439            self.elements.retain(|c| !capability.less_equal(c));
440            self.elements.push(capability);
441        }
442    }
443
444    /// Creates a new capability to send data at `time`.
445    ///
446    /// This method panics if there does not exist a capability in `self.elements` less or equal to `time`.
447    pub fn delayed(&self, time: &T) -> Capability<T> {
448        /// Makes the panic branch cold & outlined to decrease code bloat & give
449        /// the inner function the best chance possible of being inlined with
450        /// minimal code bloat
451        #[cold]
452        #[inline(never)]
453        fn delayed_panic(invalid_time: &dyn Debug) -> ! {
454            // Formatting & panic machinery is relatively expensive in terms of code bloat, so
455            // we outline it
456            panic!(
457                "failed to create a delayed capability, the current set does not \
458                have an element less than or equal to {:?}",
459                invalid_time,
460            )
461        }
462
463        self.try_delayed(time)
464            .unwrap_or_else(|| delayed_panic(time))
465    }
466
467    /// Attempts to create a new capability to send data at `time`.
468    ///
469    /// Returns [`None`] if there does not exist a capability in `self.elements` less or equal to `time`.
470    pub fn try_delayed(&self, time: &T) -> Option<Capability<T>> {
471        self.elements
472            .iter()
473            .find(|capability| capability.time().less_equal(time))
474            .and_then(|capability| capability.try_delayed(time))
475    }
476
477    /// Downgrades the set of capabilities to correspond with the times in `frontier`.
478    ///
479    /// This method panics if any element of `frontier` is not greater or equal to some element of `self.elements`.
480    pub fn downgrade<B, F>(&mut self, frontier: F)
481    where
482        B: borrow::Borrow<T>,
483        F: IntoIterator<Item = B>,
484    {
485        /// Makes the panic branch cold & outlined to decrease code bloat & give
486        /// the inner function the best chance possible of being inlined with
487        /// minimal code bloat
488        #[cold]
489        #[inline(never)]
490        fn downgrade_panic() -> ! {
491            // Formatting & panic machinery is relatively expensive in terms of code bloat, so
492            // we outline it
493            panic!(
494                "Attempted to downgrade a CapabilitySet with a frontier containing an element \
495                that was not beyond an element within the set"
496            )
497        }
498
499        self.try_downgrade(frontier)
500            .unwrap_or_else(|_| downgrade_panic())
501    }
502
503    /// Attempts to downgrade the set of capabilities to correspond with the times in `frontier`.
504    ///
505    /// Returns [`None`] if any element of `frontier` is not greater or equal to some element of `self.elements`.
506    ///
507    /// **Warning**: If an error is returned the capability set may be in an inconsistent state and can easily
508    /// cause logic errors within the program if not properly handled.
509    ///
510    pub fn try_downgrade<B, F>(&mut self, frontier: F) -> Result<(), DowngradeError>
511    where
512        B: borrow::Borrow<T>,
513        F: IntoIterator<Item = B>,
514    {
515        let count = self.elements.len();
516        for time in frontier.into_iter() {
517            let capability = self.try_delayed(time.borrow()).ok_or(DowngradeError(()))?;
518            self.elements.push(capability);
519        }
520        self.elements.drain(..count);
521
522        Ok(())
523    }
524}
525
526impl<T> From<Vec<Capability<T>>> for CapabilitySet<T>
527where
528    T: Timestamp,
529{
530    fn from(capabilities: Vec<Capability<T>>) -> Self {
531        let mut this = Self::with_capacity(capabilities.len());
532        for capability in capabilities {
533            this.insert(capability);
534        }
535
536        this
537    }
538}
539
540impl<T: Timestamp> Default for CapabilitySet<T> {
541    fn default() -> Self {
542        Self::new()
543    }
544}
545
546impl<T: Timestamp> Deref for CapabilitySet<T> {
547    type Target=[Capability<T>];
548
549    fn deref(&self) -> &[Capability<T>] {
550        &self.elements
551    }
552}