elusion

Struct CustomDataFrame

Source
pub struct CustomDataFrame {
    pub df: DataFrame,
    pub table_alias: String,
    /* private fields */
}
Expand description

Helper function to extract partition values from a RecordBatch based on partition columns. Assumes that each RecordBatch has a single unique partition value.

Fields§

§df: DataFrame§table_alias: String

Implementations§

Source§

impl CustomDataFrame

Source

pub async fn new<'a>( file_path: &'a str, columns: Vec<(&'a str, &'a str, bool)>, alias: &'a str, ) -> Self

NEW method for loading and schema definition

Source

pub async fn raw_sql( &self, sql: &str, alias: &str, dfs: &[&CustomDataFrame], ) -> ElusionResult<Self>

Execute a raw SQL query involving multiple CustomDataFrame instances and return a new CustomDataFrame with the results.

§Arguments
  • sql - The raw SQL query string to execute.
  • alias - The alias name for the resulting DataFrame.
  • additional_dfs - A slice of references to other CustomDataFrame instances to be registered in the context.
§Returns
  • ElusionResult<Self> - A new CustomDataFrame containing the result of the SQL query.
Source

pub fn load_csv<'a>( file_path: &'a str, schema: Arc<Schema>, alias: &'a str, ) -> BoxFuture<'a, Result<AliasedDataFrame, DataFusionError>>

LOAD function for CSV file type

Source

pub fn load_json<'a>( file_path: &'a str, alias: &'a str, ) -> BoxFuture<'a, Result<AliasedDataFrame, DataFusionError>>

Loads a JSON file into a DataFusion DataFrame.

§Arguments
  • file_path - The path to the JSON file.
  • schema - The Arrow schema defining the DataFrame columns.
  • alias - The alias name for the table within DataFusion.
§Returns
Source

pub fn load<'a>( file_path: &'a str, schema: Arc<Schema>, alias: &'a str, ) -> BoxFuture<'a, Result<AliasedDataFrame, DataFusionError>>

unified load() funciton

Source

pub fn aggregation(self, aggregations: Vec<AggregationBuilder>) -> Self

AGGREAGATION helper

Source

pub fn from_subquery(self, sub_df: CustomDataFrame, alias: &str) -> Self

FROM SUBQUERY clause

Source

pub fn with_cte(self, name: &str, cte_df: CustomDataFrame) -> Self

WITH CTE claUse

Source

pub fn union(self, other: CustomDataFrame, all: bool) -> Self

UNION clause

Source

pub fn intersect(self, other: CustomDataFrame, all: bool) -> Self

INTERSECT cluase

Source

pub fn except(self, other: CustomDataFrame, all: bool) -> Self

EXCEPT clause

Source

pub fn select(self, columns: Vec<&str>) -> Self

SELECT clause

Source

pub fn group_by(self, group_columns: Vec<&str>) -> Self

GROUP BY clause

Source

pub fn order_by(self, columns: Vec<&str>, ascending: Vec<bool>) -> Self

ORDER BY clause

Source

pub fn limit(self, count: usize) -> Self

LIMIT lcause

Source

pub fn filter(self, condition: &str) -> Self

FILTER clause Applies a WHERE filter with automatic lowercasing

Source

pub fn having(self, condition: &str) -> Self

Applies a HAVING filter with automatic lowercasing

Source

pub fn join( self, other: CustomDataFrame, condition: &str, join_type: &str, ) -> Self

JOIN clause

Source

pub fn window( self, func: &str, column: &str, partition_by: Vec<&str>, order_by: Vec<&str>, alias: Option<&str>, ) -> Self

WINDOW CLAUSE

Source

pub fn add_column_with_cast( self, column: &str, new_alias: &str, data_type: &str, ) -> Self

CAST function

Source

pub fn add_column_with_trim(self, column: &str, new_alias: &str) -> Self

TRIM funciton

Source

pub fn add_column_with_regex( self, column: &str, pattern: &str, new_alias: &str, ) -> Self

REGEX function

Source

pub fn display_query_plan(&self)

DISPLAY Query Plan

Source

pub fn display_schema(&self)

Displays the current schema for debugging purposes.

Source

pub fn display_query(&self)

Dipslaying query genereated from chained functions

Source

pub async fn display(&self) -> Result<(), DataFusionError>

Display functions that display results to terminal

Source

pub async fn write_to_parquet( &self, mode: &str, path: &str, options: Option<DataFrameWriteOptions>, ) -> ElusionResult<()>

Write the DataFrame to a Parquet file.

This function wraps DataFusion’s write_parquet method for easier usage.

§Parameters
  • mode: Specifies the write mode. Accepted values are:
    • "overwrite": Deletes existing files at the target path before writing.
    • "append": Appends to the existing Parquet file if it exists.
  • path: The file path where the Parquet file will be saved.
  • options: Optional write options for customizing the output.
§Example
// Write to Parquet in overwrite mode
custom_df.write_to_parquet("overwrite", "output.parquet", None).await?;

// Write to Parquet in append mode
custom_df.write_to_parquet("append", "output.parquet", None).await?;
§Errors

Returns a DataFusionError if the DataFrame execution or writing fails.

Source

pub async fn write_to_csv( &self, mode: &str, path: &str, csv_options: CsvWriteOptions, ) -> ElusionResult<()>

Writes the DataFrame to a CSV file in either “overwrite” or “append” mode.

§Arguments
  • mode - The write mode, either “overwrite” or “append”.
  • path - The file path where the CSV will be written.
  • options - Optional DataFrameWriteOptions for customizing the write behavior.
§Returns
  • ElusionResult<()> - Ok(()) on success, or an ElusionError on failure.

Trait Implementations§

Source§

impl Clone for CustomDataFrame

Source§

fn clone(&self) -> CustomDataFrame

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> ErasedDestructor for T
where T: 'static,

Source§

impl<T> MaybeSendSync for T