#![deny(missing_docs)]
pub mod row_mask;
pub mod selection;
use std::any::Any;
use std::ops::Range;
use std::sync::Arc;
use async_trait::async_trait;
use futures::stream::BoxStream;
use selection::Selection;
use vortex_array::dtype::DType;
use vortex_array::dtype::FieldPath;
use vortex_array::expr::Expression;
use vortex_array::expr::root;
use vortex_array::expr::stats::Precision;
use vortex_array::stats::StatsSet;
use vortex_array::stream::SendableArrayStream;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_session::VortexSession;
pub type PartitionStream = BoxStream<'static, VortexResult<PartitionRef>>;
#[async_trait]
pub trait DataSourceOpener: 'static {
async fn open(&self, uri: String, session: &VortexSession) -> VortexResult<DataSourceRef>;
}
#[async_trait]
pub trait DataSourceRemote: 'static {
fn deserialize_data_source(
&self,
data: &[u8],
session: &VortexSession,
) -> VortexResult<DataSourceRef>;
}
pub type DataSourceRef = Arc<dyn DataSource>;
#[async_trait]
pub trait DataSource: 'static + Send + Sync {
fn dtype(&self) -> &DType;
fn row_count(&self) -> Option<Precision<u64>> {
None
}
fn byte_size(&self) -> Option<Precision<u64>> {
None
}
fn serialize(&self) -> VortexResult<Option<Vec<u8>>> {
Ok(None)
}
fn deserialize_partition(
&self,
data: &[u8],
session: &VortexSession,
) -> VortexResult<PartitionRef> {
let _ = (data, session);
vortex_bail!("DataSource does not support deserialization")
}
async fn scan(&self, scan_request: ScanRequest) -> VortexResult<DataSourceScanRef>;
async fn field_statistics(&self, field_path: &FieldPath) -> VortexResult<StatsSet>;
}
#[derive(Debug, Clone)]
pub struct ScanRequest {
pub projection: Expression,
pub filter: Option<Expression>,
pub row_range: Option<Range<u64>>,
pub selection: Selection,
pub ordered: bool,
pub limit: Option<u64>,
}
impl Default for ScanRequest {
fn default() -> Self {
Self {
projection: root(),
filter: None,
row_range: None,
selection: Selection::default(),
ordered: false,
limit: None,
}
}
}
pub type DataSourceScanRef = Box<dyn DataSourceScan>;
pub trait DataSourceScan: 'static + Send {
fn dtype(&self) -> &DType;
fn partition_count(&self) -> Option<Precision<usize>>;
fn partitions(self: Box<Self>) -> PartitionStream;
}
pub type PartitionRef = Box<dyn Partition>;
pub trait Partition: 'static + Send {
fn as_any(&self) -> &dyn Any;
fn row_count(&self) -> Option<Precision<u64>>;
fn byte_size(&self) -> Option<Precision<u64>>;
fn serialize(&self) -> VortexResult<Option<Vec<u8>>> {
Ok(None)
}
fn execute(self: Box<Self>) -> VortexResult<SendableArrayStream>;
}