Struct ExecutionGraph

Source

pub struct ExecutionGraph { /* private fields */ }

Expand description

Represents the DAG for a distributed query plan.

A distributed query plan consists of a set of stages which must be executed sequentially.

Each stage consists of a set of partitions which can be executed in parallel, where each partition represents a Task, which is the basic unit of scheduling in kapot.

As an example, consider a SQL query which performs a simple aggregation:

SELECT id, SUM(gmv) FROM some_table GROUP BY id

This will produce a DataFusion execution plan that looks something like

CoalesceBatchesExec: target_batch_size=4096 RepartitionExec: partitioning=Hash([Column { name: “id”, index: 0 }], 4) AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[SUM(some_table.gmv)] TableScan: some_table

The kapot DistributedPlanner will turn this into a distributed plan by creating a shuffle boundary (called a “Stage”) whenever the underlying plan needs to perform a repartition. In this case we end up with a distributed plan with two stages:

ExecutionGraph[job_id=job, session_id=session, available_tasks=1, complete=false] =========UnResolvedStage[id=2, children=1]========= Inputs{1: StageOutput { partition_locations: {}, complete: false }} ShuffleWriterExec: None AggregateExec: mode=FinalPartitioned, gby=[id@0 as id], aggr=[SUM(?table?.gmv)] CoalesceBatchesExec: target_batch_size=4096 UnresolvedShuffleExec =========ResolvedStage[id=1, partitions=1]========= ShuffleWriterExec: Some(Hash([Column { name: “id”, index: 0 }], 4)) AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[SUM(?table?.gmv)] TableScan: some_table

The DAG structure of this ExecutionGraph is encoded in the stages. Each stage’s input field will indicate which stages it depends on, and each stage’s output_links will indicate which stage it needs to publish its output to.

If a stage has output_links is empty then it is the final stage in this query, and it should publish its outputs to the ExecutionGraphs output_locations representing the final query results.

ExecutionGraph

Struct ExecutionGraph Copy item path

Implementations§

impl ExecutionGraph

pub fn new( scheduler_id: &str, job_id: &str, job_name: &str, session_id: &str, plan: Arc<dyn ExecutionPlan>, queued_at: u64, ) -> Result<Self>

pub fn job_id(&self) -> &str

pub fn job_name(&self) -> &str

pub fn session_id(&self) -> &str

pub fn status(&self) -> &JobStatus

pub fn start_time(&self) -> u64

pub fn end_time(&self) -> u64

pub fn stage_count(&self) -> usize

pub fn next_task_id(&mut self) -> usize

pub fn is_successful(&self) -> bool

pub fn is_complete(&self) -> bool

pub fn revive(&mut self) -> bool

pub fn update_task_status( &mut self, executor: &ExecutorMetadata, task_statuses: Vec<TaskStatus>, max_task_failures: usize, max_stage_failures: usize, ) -> Result<Vec<QueryStageSchedulerEvent>>

pub fn running_stages(&self) -> Vec<usize>

pub fn running_tasks(&self) -> Vec<RunningTaskInfo>

pub fn available_tasks(&self) -> usize

pub fn pop_next_task( &mut self, executor_id: &str, ) -> Result<Option<TaskDescription>>

pub fn update_status(&mut self, status: JobStatus)

pub fn output_locations(&self) -> Vec<PartitionLocation>

pub fn reset_stages_on_lost_executor( &mut self, executor_id: &str, ) -> Result<(HashSet<usize>, Vec<RunningTaskInfo>)>

pub fn resolve_stage(&mut self, stage_id: usize) -> Result<bool>

pub fn succeed_stage(&mut self, stage_id: usize) -> bool

pub fn fail_stage(&mut self, stage_id: usize, err_msg: String) -> bool

pub fn rollback_running_stage( &mut self, stage_id: usize, failure_reasons: HashSet<String>, ) -> Result<Vec<RunningTaskInfo>>

pub fn rollback_resolved_stage(&mut self, stage_id: usize) -> Result<bool>

pub fn rerun_successful_stage(&mut self, stage_id: usize) -> bool

pub fn fail_job(&mut self, error: String)

pub fn succeed_job(&mut self) -> Result<()>

Trait Implementations§

impl Clone for ExecutionGraph

fn clone(&self) -> ExecutionGraph

fn clone_from(&mut self, source: &Self)

impl Debug for ExecutionGraph

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl From<&ExecutionGraph> for JobOverview

fn from(value: &ExecutionGraph) -> Self

Auto Trait Implementations§

impl Freeze for ExecutionGraph

impl !RefUnwindSafe for ExecutionGraph

impl Send for ExecutionGraph

impl Sync for ExecutionGraph

impl Unpin for ExecutionGraph

impl !UnwindSafe for ExecutionGraph

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> FromRef<T> for Twhere T: Clone,

fn from_ref(input: &T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> IntoRequest<T> for T

fn into_request(self) -> Request<T>

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

Struct ExecutionGraph

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> FromRef<T> for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,