pub struct HudiDataSource { /* private fields */ }
Expand description
Create a HudiDataSource
.
Used for Datafusion to query Hudi tables
§Examples
use std::sync::Arc;
use datafusion::error::Result;
use datafusion::prelude::{DataFrame, SessionContext};
use hudi_datafusion::HudiDataSource;
// Initialize a new DataFusion session context
let ctx = SessionContext::new();
// Create a new HudiDataSource with specific read options
let hudi = HudiDataSource::new_with_options(
"/tmp/trips_table",
[("hoodie.read.input.partitions", 5)]).await?;
// Register the Hudi table with the session context
ctx.register_table("trips_table", Arc::new(hudi))?;
let df: DataFrame = ctx.sql("SELECT * from trips_table where city = 'san_francisco'").await?;
df.show().await?;
Implementations§
Trait Implementations§
Source§impl Clone for HudiDataSource
impl Clone for HudiDataSource
Source§fn clone(&self) -> HudiDataSource
fn clone(&self) -> HudiDataSource
Returns a duplicate of the value. Read more
1.0.0 · Source§const fn clone_from(&mut self, source: &Self)
const fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source
. Read moreSource§impl Debug for HudiDataSource
impl Debug for HudiDataSource
Source§impl TableProvider for HudiDataSource
impl TableProvider for HudiDataSource
Source§fn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Returns the table provider as
Any
so that it can be
downcast to a specific implementation.Source§fn table_type(&self) -> TableType
fn table_type(&self) -> TableType
Get the type of this table for metadata/catalog purposes.
Source§fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
projection: Option<&'life2 Vec<usize>>,
filters: &'life3 [Expr],
limit: Option<usize>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
projection: Option<&'life2 Vec<usize>>,
filters: &'life3 [Expr],
limit: Option<usize>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
Create an
ExecutionPlan
for scanning the table with optionally
specified projection
, filter
and limit
, described below. Read moreSource§fn supports_filters_pushdown(
&self,
filters: &[&Expr],
) -> Result<Vec<TableProviderFilterPushDown>>
fn supports_filters_pushdown( &self, filters: &[&Expr], ) -> Result<Vec<TableProviderFilterPushDown>>
Specify if DataFusion should provide filter expressions to the
TableProvider to apply during the scan. Read more
Source§fn constraints(&self) -> Option<&Constraints>
fn constraints(&self) -> Option<&Constraints>
Get a reference to the constraints of the table.
Returns: Read more
Source§fn get_table_definition(&self) -> Option<&str>
fn get_table_definition(&self) -> Option<&str>
Get the create statement used to create this table, if available.
Source§fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>
fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>
Get the
LogicalPlan
of this table, if available.Source§fn get_column_default(&self, _column: &str) -> Option<&Expr>
fn get_column_default(&self, _column: &str) -> Option<&Expr>
Get the default value for a column, if available.
Source§fn statistics(&self) -> Option<Statistics>
fn statistics(&self) -> Option<Statistics>
Get statistics for this table, if available
Although not presently used in mainline DataFusion, this allows implementation specific
behavior for downstream repositories, in conjunction with specialized optimizer rules to
perform operations such as re-ordering of joins.
Source§fn insert_into<'life0, 'life1, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
_input: Arc<dyn ExecutionPlan>,
_insert_op: InsertOp,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>, DataFusionError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn insert_into<'life0, 'life1, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
_input: Arc<dyn ExecutionPlan>,
_insert_op: InsertOp,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>, DataFusionError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Return an
ExecutionPlan
to insert data into this table, if
supported. Read moreAuto Trait Implementations§
impl Freeze for HudiDataSource
impl !RefUnwindSafe for HudiDataSource
impl Send for HudiDataSource
impl Sync for HudiDataSource
impl Unpin for HudiDataSource
impl !UnwindSafe for HudiDataSource
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more