pub trait DataSource:
'static
+ Send
+ Sync {
// Required methods
fn dtype(&self) -> &DType;
fn scan<'life0, 'async_trait>(
&'life0 self,
scan_request: ScanRequest,
) -> Pin<Box<dyn Future<Output = VortexResult<DataSourceScanRef>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn field_statistics<'life0, 'life1, 'async_trait>(
&'life0 self,
field_path: &'life1 FieldPath,
) -> Pin<Box<dyn Future<Output = VortexResult<StatsSet>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait;
// Provided methods
fn row_count(&self) -> Option<Precision<u64>> { ... }
fn byte_size(&self) -> Option<Precision<u64>> { ... }
fn serialize(&self) -> VortexResult<Option<Vec<u8>>> { ... }
fn deserialize_partition(
&self,
data: &[u8],
session: &VortexSession,
) -> VortexResult<PartitionRef> { ... }
}Expand description
A data source represents a streamable dataset that can be scanned with projection and filter expressions. Each scan produces partitions that can be executed in parallel to read data. Each partition can be serialized for remote execution.
The DataSource may be used multiple times to create multiple scans, whereas each scan and each partition of a scan can only be consumed once.
Required Methods§
Sourcefn scan<'life0, 'async_trait>(
&'life0 self,
scan_request: ScanRequest,
) -> Pin<Box<dyn Future<Output = VortexResult<DataSourceScanRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn scan<'life0, 'async_trait>(
&'life0 self,
scan_request: ScanRequest,
) -> Pin<Box<dyn Future<Output = VortexResult<DataSourceScanRef>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Returns a scan over the source.
Sourcefn field_statistics<'life0, 'life1, 'async_trait>(
&'life0 self,
field_path: &'life1 FieldPath,
) -> Pin<Box<dyn Future<Output = VortexResult<StatsSet>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
fn field_statistics<'life0, 'life1, 'async_trait>(
&'life0 self,
field_path: &'life1 FieldPath,
) -> Pin<Box<dyn Future<Output = VortexResult<StatsSet>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Returns the statistics for a given field.
Provided Methods§
Sourcefn row_count(&self) -> Option<Precision<u64>>
fn row_count(&self) -> Option<Precision<u64>>
Returns an estimate of the row count of the un-filtered source.
Sourcefn byte_size(&self) -> Option<Precision<u64>>
fn byte_size(&self) -> Option<Precision<u64>>
Returns an estimate of the byte size of the un-filtered source.
Sourcefn serialize(&self) -> VortexResult<Option<Vec<u8>>>
fn serialize(&self) -> VortexResult<Option<Vec<u8>>>
Serialize the DataSource to pass to a remote worker.
Sourcefn deserialize_partition(
&self,
data: &[u8],
session: &VortexSession,
) -> VortexResult<PartitionRef>
fn deserialize_partition( &self, data: &[u8], session: &VortexSession, ) -> VortexResult<PartitionRef>
Deserialize a partition that was previously serialized from a compatible data source.