pub struct NetworkShuffleExec { /* private fields */ }Expand description
ExecutionPlan implementation that shuffles data across the network in a distributed context.
The easiest way of thinking about this node is as a plan RepartitionExec node that is capable of fanning out the different produced partitions to different tasks. This allows redistributing data across different tasks in different stages, so that different physical machines can make progress on different non-overlapping sets of data.
This node allows fanning out of data from N tasks to M tasks, with N and M being arbitrary non-zero positive numbers. Here are some examples of how data can be shuffled in different scenarios:
§1 to many
┌───────────────────────────┐ ┌───────────────────────────┐ ┌───────────────────────────┐ ■
│ NetworkShuffleExec │ │ NetworkShuffleExec │ │ NetworkShuffleExec │ │
│ (task 1) │ │ (task 2) │ │ (task 3) │ │
└┬─┬┬─┬┬─┬──────────────────┘ └─────────┬─┬┬─┬┬─┬─────────┘ └──────────────────┬─┬┬─┬┬─┬┘ Stage N+1
│1││2││3│ │4││5││6│ │7││8││9│ │
└─┘└─┘└─┘ └─┘└─┘└─┘ └─┘└─┘└─┘ │
▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ■
└──┴──┴────────────────────────┬──┬──┐ │ │ │ ┌──┬──┬───────────────────────┴──┴──┘
│ │ │ │ │ │ │ │ │ ■
┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ │
│1││2││3││4││5││6││7││8││9│ │
┌┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┐ Stage N
│ RepartitionExec │ │
│ (task 1) │ │
└───────────────────────────┘ ■§many to 1
┌───────────────────────────┐ ■
│ NetworkShuffleExec │ │
│ (task 1) │ │
└┬─┬┬─┬┬─┬┬─┬┬─┬┬─┬┬─┬┬─┬┬─┬┘ Stage N+1
│1││2││3││4││5││6││7││8││9│ │
└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘ │
▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲▲ ■
┌──┬──┬──┬──┬──┬──┬──┬──┬─────┴┼┴┴┼┴┴┼┴┴┼┴┴┼┴┴┼┴┴┼┴┴┼┴┴┼┴────┬──┬──┬──┬──┬──┬──┬──┬──┐
│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ■
┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ ┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ ┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ │
│1││2││3││4││5││6││7││8││9│ │1││2││3││4││5││6││7││8││9│ │1││2││3││4││5││6││7││8││9│ │
┌┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┐ ┌┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┐ ┌┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┐ Stage N
│ RepartitionExec │ │ RepartitionExec │ │ RepartitionExec │ │
│ (task 1) │ │ (task 2) │ │ (task 3) │ │
└───────────────────────────┘ └───────────────────────────┘ └───────────────────────────┘ ■§many to many
┌───────────────────────────┐ ┌───────────────────────────┐ ■
│ NetworkShuffleExec │ │ NetworkShuffleExec │ │
│ (task 1) │ │ (task 2) │ │
└┬─┬┬─┬┬─┬┬─┬───────────────┘ └───────────────┬─┬┬─┬┬─┬┬─┬┘ Stage N+1
│1││2││3││4│ │5││6││7││8│ │
└─┘└─┘└─┘└─┘ └─┘└─┘└─┘└─┘ │
▲▲▲▲▲▲▲▲▲▲▲▲ ▲▲▲▲▲▲▲▲▲▲▲▲ ■
┌──┬──┬──┬──┬──┬┴┴┼┴┴┼┴┴┴┴┴┴───┬──┬──┬──┬──┬──┬──┬──┬────────┬┴┴┼┴┴┼┴┴┼┴┴┼──┬──┬──┐
│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ ■
┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ ┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ ┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐ │
│1││2││3││4││5││6││7││8│ │1││2││3││4││5││6││7││8│ │1││2││3││4││5││6││7││8│ │
┌──┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴─┐ ┌──┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴─┐ ┌──┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴┴─┴─┐ Stage N
│ RepartitionExec │ │ RepartitionExec │ │ RepartitionExec │ │
│ (task 1) │ │ (task 2) │ │ (task 3) │ │
└───────────────────────────┘ └───────────────────────────┘ └───────────────────────────┘ ■The communication between two stages across a NetworkShuffleExec has two implications:
- Each task in Stage N+1 gathers data from all tasks in Stage N
- The total number of partitions across all tasks in Stage N+1 is equal to the number of partitions in a single task in Stage N. (e.g. (1,2,3,4)+(5,6,7,8) = (1,2,3,4,5,6,7,8) )
This node has two variants.
- Pending: acts as a placeholder for the distributed optimization step to mark it as ready.
- Ready: runs within a distributed stage and queries the next input stage over the network using Arrow Flight.
Implementations§
Source§impl NetworkShuffleExec
impl NetworkShuffleExec
Sourcepub fn try_new(
input: Arc<dyn ExecutionPlan>,
producer_tasks: usize,
) -> Result<Self>
pub fn try_new( input: Arc<dyn ExecutionPlan>, producer_tasks: usize, ) -> Result<Self>
Creates a new NetworkShuffleExec fed by the provided RepartitionExec. The input plan
will be executed in a remote worker in producer_tasks number of tasks.
Trait Implementations§
Source§impl Clone for NetworkShuffleExec
impl Clone for NetworkShuffleExec
Source§fn clone(&self) -> NetworkShuffleExec
fn clone(&self) -> NetworkShuffleExec
1.0.0 (const: unstable) · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for NetworkShuffleExec
impl Debug for NetworkShuffleExec
Source§impl DisplayAs for NetworkShuffleExec
impl DisplayAs for NetworkShuffleExec
Source§impl ExecutionPlan for NetworkShuffleExec
impl ExecutionPlan for NetworkShuffleExec
Source§fn properties(&self) -> &Arc<PlanProperties> ⓘ
fn properties(&self) -> &Arc<PlanProperties> ⓘ
ExecutionPlan, such as output
ordering(s), partitioning information etc. Read moreSource§fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>>
fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>>
ExecutionPlans that act as inputs to this plan.
The returned list will be empty for leaf nodes such as scans, will contain
a single value for unary nodes, or two values for binary nodes (such as
joins).Source§fn with_new_children(
self: Arc<Self>,
children: Vec<Arc<dyn ExecutionPlan>>,
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError>
fn with_new_children( self: Arc<Self>, children: Vec<Arc<dyn ExecutionPlan>>, ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError>
ExecutionPlan where all existing children were replaced
by the children, in orderSource§fn execute(
&self,
partition: usize,
context: Arc<TaskContext>,
) -> Result<SendableRecordBatchStream, DataFusionError>
fn execute( &self, partition: usize, context: Arc<TaskContext>, ) -> Result<SendableRecordBatchStream, DataFusionError>
Source§fn metrics(&self) -> Option<MetricsSet>
fn metrics(&self) -> Option<MetricsSet>
Metrics for this
ExecutionPlan. If no Metrics are available, return None. Read moreSource§fn static_name() -> &'static strwhere
Self: Sized,
fn static_name() -> &'static strwhere
Self: Sized,
name but can be called without an instance.Source§fn downcast_delegate(&self) -> Option<&(dyn ExecutionPlan + 'static)>
fn downcast_delegate(&self) -> Option<&(dyn ExecutionPlan + 'static)>
ExecutionPlan downcast identity. Read moreSource§fn check_invariants(&self, check: InvariantLevel) -> Result<(), DataFusionError>
fn check_invariants(&self, check: InvariantLevel) -> Result<(), DataFusionError>
Source§fn required_input_distribution(&self) -> Vec<Distribution>
fn required_input_distribution(&self) -> Vec<Distribution>
ExecutionPlan, By default it’s [Distribution::UnspecifiedDistribution] for each child,Source§fn required_input_ordering(&self) -> Vec<Option<OrderingRequirements>>
fn required_input_ordering(&self) -> Vec<Option<OrderingRequirements>>
ExecutionPlan. Read moreSource§fn maintains_input_order(&self) -> Vec<bool>
fn maintains_input_order(&self) -> Vec<bool>
false if this ExecutionPlan’s implementation may reorder
rows within or between partitions. Read moreSource§fn benefits_from_input_partitioning(&self) -> Vec<bool>
fn benefits_from_input_partitioning(&self) -> Vec<bool>
ExecutionPlan benefits from increased
parallelization at its input for each child. Read moreSource§fn reset_state(
self: Arc<Self>,
) -> Result<Arc<dyn ExecutionPlan>, DataFusionError>
fn reset_state( self: Arc<Self>, ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError>
ExecutionPlan. Read moreSource§fn repartitioned(
&self,
_target_partitions: usize,
_config: &ConfigOptions,
) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError>
fn repartitioned( &self, _target_partitions: usize, _config: &ConfigOptions, ) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError>
ExecutionPlan to
produce target_partitions partitions. Read moreSource§fn partition_statistics(
&self,
partition: Option<usize>,
) -> Result<Arc<Statistics>, DataFusionError>
fn partition_statistics( &self, partition: Option<usize>, ) -> Result<Arc<Statistics>, DataFusionError>
ExecutionPlan node.
If statistics are not available, should return Statistics::new_unknown
(the default), not an error.
If partition is None, it returns statistics for the entire plan.Source§fn supports_limit_pushdown(&self) -> bool
fn supports_limit_pushdown(&self) -> bool
Source§fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>>
fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>>
ExecutionPlan node, if it supports
fetch limits. Returns None otherwise. Read moreSource§fn fetch(&self) -> Option<usize>
fn fetch(&self) -> Option<usize>
None means there is no fetch.Source§fn cardinality_effect(&self) -> CardinalityEffect
fn cardinality_effect(&self) -> CardinalityEffect
Source§fn try_swapping_with_projection(
&self,
_projection: &ProjectionExec,
) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError>
fn try_swapping_with_projection( &self, _projection: &ProjectionExec, ) -> Result<Option<Arc<dyn ExecutionPlan>>, DataFusionError>
ExecutionPlan. Read moreSource§fn gather_filters_for_pushdown(
&self,
_phase: FilterPushdownPhase,
parent_filters: Vec<Arc<dyn PhysicalExpr>>,
_config: &ConfigOptions,
) -> Result<FilterDescription, DataFusionError>
fn gather_filters_for_pushdown( &self, _phase: FilterPushdownPhase, parent_filters: Vec<Arc<dyn PhysicalExpr>>, _config: &ConfigOptions, ) -> Result<FilterDescription, DataFusionError>
ExecutionPlan::gather_filters_for_pushdown: Read moreSource§fn handle_child_pushdown_result(
&self,
_phase: FilterPushdownPhase,
child_pushdown_result: ChildPushdownResult,
_config: &ConfigOptions,
) -> Result<FilterPushdownPropagation<Arc<dyn ExecutionPlan>>, DataFusionError>
fn handle_child_pushdown_result( &self, _phase: FilterPushdownPhase, child_pushdown_result: ChildPushdownResult, _config: &ConfigOptions, ) -> Result<FilterPushdownPropagation<Arc<dyn ExecutionPlan>>, DataFusionError>
Source§fn with_new_state(
&self,
_state: Arc<dyn Any + Send + Sync>,
) -> Option<Arc<dyn ExecutionPlan>>
fn with_new_state( &self, _state: Arc<dyn Any + Send + Sync>, ) -> Option<Arc<dyn ExecutionPlan>>
Source§fn try_pushdown_sort(
&self,
_order: &[PhysicalSortExpr],
) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>, DataFusionError>
fn try_pushdown_sort( &self, _order: &[PhysicalSortExpr], ) -> Result<SortOrderPushdownResult<Arc<dyn ExecutionPlan>>, DataFusionError>
Source§fn with_preserve_order(
&self,
_preserve_order: bool,
) -> Option<Arc<dyn ExecutionPlan>>
fn with_preserve_order( &self, _preserve_order: bool, ) -> Option<Arc<dyn ExecutionPlan>>
ExecutionPlan that is aware of order-sensitivity. Read moreSource§impl NetworkBoundary for NetworkShuffleExec
impl NetworkBoundary for NetworkShuffleExec
Source§fn input_stage(&self) -> &Stage
fn input_stage(&self) -> &Stage
Source§fn with_input_stage(&self, input_stage: Stage) -> Result<Arc<dyn ExecutionPlan>>
fn with_input_stage(&self, input_stage: Stage) -> Result<Arc<dyn ExecutionPlan>>
Source§fn producer_head(&self, consumer_task_count: usize) -> ProducerHead
fn producer_head(&self, consumer_task_count: usize) -> ProducerHead
Auto Trait Implementations§
impl !RefUnwindSafe for NetworkShuffleExec
impl !UnwindSafe for NetworkShuffleExec
impl Freeze for NetworkShuffleExec
impl Send for NetworkShuffleExec
impl Sync for NetworkShuffleExec
impl Unpin for NetworkShuffleExec
impl UnsafeUnpin for NetworkShuffleExec
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> IntoRequest<T> for T
impl<T> IntoRequest<T> for T
Source§fn into_request(self) -> Request<T>
fn into_request(self) -> Request<T>
T in a tonic::Request