datafusion_distributed/work_unit_feed/work_unit.rs
1use prost::Message;
2use std::any::Any;
3
4/// A [`WorkUnit`] is a single unit of runtime metadata produced by a
5/// [`crate::WorkUnitFeedProvider`] and consumed by a leaf [`datafusion::physical_plan::ExecutionPlan`]
6/// via an embedded [`crate::WorkUnitFeed`].
7///
8/// It can be anything:
9/// - A Parquet file address in S3 that must be read.
10/// - An HTTP query that should be issued to an external API.
11/// - An external database query that should be executed externally.
12/// - Etc...
13///
14/// Any protobuf message that implements `prost`'s [`Message`] trait is automatically a
15/// valid `WorkUnit` — no explicit `impl` block is required. Work units are serialized
16/// with `prost` when streamed from the coordinator to workers, and decoded back to the
17/// concrete `Self::WorkUnit` type of the receiving [`crate::WorkUnitFeedProvider`].
18///
19/// # Example
20///
21/// ```rust
22/// # use datafusion_distributed::WorkUnit;
23///
24/// #[derive(Clone, PartialEq, ::prost::Message)]
25/// struct FileAddressWorkUnit {
26/// #[prost(string, tag = "1")]
27/// url: String,
28/// }
29///
30/// let file_address = FileAddressWorkUnit {
31/// url: "s3://my-bucket/file.format".into()
32/// };
33///
34/// let work_unit = Box::new(file_address) as Box<dyn WorkUnit>;
35/// ```
36pub trait WorkUnit: Message {
37 fn into_any(self: Box<Self>) -> Box<dyn Any>;
38 fn encode_to_bytes(&self) -> Vec<u8>;
39}
40
41impl<T: Message + Default + 'static> WorkUnit for T {
42 fn into_any(self: Box<Self>) -> Box<dyn Any> {
43 self
44 }
45 fn encode_to_bytes(&self) -> Vec<u8> {
46 self.encode_to_vec()
47 }
48}