pub trait DataSource:
'static
+ Send
+ Sync {
// Required methods
fn dtype(&self) -> &DType;
fn row_count_estimate(&self) -> Estimate<u64>;
fn scan<'life0, 'async_trait>(
&'life0 self,
scan_request: ScanRequest,
) -> Pin<Box<dyn Future<Output = VortexResult<DataSourceScanRef>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn serialize_split(&self, split: &dyn Split) -> VortexResult<Vec<u8>>;
fn deserialize_split(&self, data: &[u8]) -> VortexResult<SplitRef>;
}Expand description
A data source represents a streamable dataset that can be scanned with projection and filter expressions. Each scan produces splits that can be executed (potentially in parallel) to read data. Each split can be serialized for remote execution.
The DataSource may be used multiple times to create multiple scans, whereas each scan and each split of a scan can only be consumed once.
Required Methods§
Sourcefn row_count_estimate(&self) -> Estimate<u64>
fn row_count_estimate(&self) -> Estimate<u64>
Returns an estimate of the row count of the source.
Sourcefn scan<'life0, 'async_trait>(
&'life0 self,
scan_request: ScanRequest,
) -> Pin<Box<dyn Future<Output = VortexResult<DataSourceScanRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn scan<'life0, 'async_trait>(
&'life0 self,
scan_request: ScanRequest,
) -> Pin<Box<dyn Future<Output = VortexResult<DataSourceScanRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Returns a scan over the source.
Sourcefn serialize_split(&self, split: &dyn Split) -> VortexResult<Vec<u8>>
fn serialize_split(&self, split: &dyn Split) -> VortexResult<Vec<u8>>
Serialize a split from this data source.
Sourcefn deserialize_split(&self, data: &[u8]) -> VortexResult<SplitRef>
fn deserialize_split(&self, data: &[u8]) -> VortexResult<SplitRef>
Deserialize a split that was previously serialized from a compatible data source.