pub struct SessionContext { /* private fields */ }
Expand description

SessionContext is the main interface for executing queries with DataFusion. It stands for the connection between user and DataFusion/Ballista cluster. The context provides the following functionality

  • Create DataFrame from a CSV or Parquet data source.
  • Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
  • Register a custom data source that can be referenced from a SQL query.
  • Execution a SQL query

The following example demonstrates how to use the context to execute a query against a CSV data source using the DataFrame API:

use datafusion::prelude::*;
let ctx = SessionContext::new();
let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("a").lt_eq(col("b")))?
           .aggregate(vec![col("a")], vec![min(col("b"))])?
           .limit(0, Some(100))?;
let results = df.collect();

The following example demonstrates how to execute the same query using SQL:

use datafusion::prelude::*;

let mut ctx = SessionContext::new();
ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;

Implementations§

source§

impl SessionContext

source

pub fn new() -> Self

Creates a new execution context using a default session configuration.

source

pub async fn refresh_catalogs(&self) -> Result<()>

Finds any ListSchemaProviders and instructs them to reload tables from “disk”

source

pub fn with_config(config: SessionConfig) -> Self

Creates a new session context using the provided session configuration.

source

pub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self

Creates a new session context using the provided configuration and RuntimeEnv.

source

pub fn with_state(state: SessionState) -> Self

Creates a new session context using the provided session state.

source

pub fn session_start_time(&self) -> DateTime<Utc>

Returns the time this session was created

source

pub fn register_batch( &self, table_name: &str, batch: RecordBatch ) -> Result<Option<Arc<dyn TableProvider>>>

Registers the [RecordBatch] as the specified table name

source

pub fn runtime_env(&self) -> Arc<RuntimeEnv>

Return the RuntimeEnv used to run queries with this SessionContext

source

pub fn session_id(&self) -> String

Return the session_id of this Session

source

pub fn enable_ident_normalization(&self) -> bool

Return the enable_ident_normalization of this Session

source

pub fn copied_config(&self) -> SessionConfig

Return a copied version of config for this Session

source

pub async fn sql(&self, sql: &str) -> Result<DataFrame>

Creates a DataFrame that will execute a SQL query.

Note: This api implements DDL such as CREATE TABLE and CREATE VIEW with in memory default implementations.

If this is not desirable, consider using SessionState::create_logical_plan() which does not mutate the state based on such statements.

source

pub fn register_variable( &self, variable_type: VarType, provider: Arc<dyn VarProvider + Send + Sync> )

Registers a variable provider within this context.

source

pub fn register_udf(&self, f: ScalarUDF)

Registers a scalar UDF within this context.

Note in SQL queries, function names are looked up using lowercase unless the query uses quotes. For example,

SELECT MY_FUNC(x)... will look for a function named "my_func" SELECT "my_FUNC"(x) will look for a function named "my_FUNC"

source

pub fn register_udaf(&self, f: AggregateUDF)

Registers an aggregate UDF within this context.

Note in SQL queries, aggregate names are looked up using lowercase unless the query uses quotes. For example,

SELECT MY_UDAF(x)... will look for an aggregate named "my_udaf" SELECT "my_UDAF"(x) will look for an aggregate named "my_UDAF"

source

pub async fn read_avro( &self, table_path: impl AsRef<str>, options: AvroReadOptions<'_> ) -> Result<DataFrame>

Creates a DataFrame for reading an Avro data source.

For more control such as reading multiple files, you can use read_table with a ListingTable.

source

pub async fn read_json( &self, table_path: impl AsRef<str>, options: NdJsonReadOptions<'_> ) -> Result<DataFrame>

Creates a DataFrame for reading an Json data source.

For more control such as reading multiple files, you can use read_table with a ListingTable.

source

pub fn read_empty(&self) -> Result<DataFrame>

Creates an empty DataFrame.

source

pub async fn read_csv( &self, table_path: impl AsRef<str>, options: CsvReadOptions<'_> ) -> Result<DataFrame>

Creates a DataFrame for reading a CSV data source.

For more control such as reading multiple files, you can use read_table with a ListingTable.

source

pub async fn read_parquet( &self, table_path: impl AsRef<str>, options: ParquetReadOptions<'_> ) -> Result<DataFrame>

Creates a DataFrame for reading a Parquet data source.

For more control such as reading multiple files, you can use read_table with a ListingTable.

source

pub fn read_table(&self, provider: Arc<dyn TableProvider>) -> Result<DataFrame>

Creates a DataFrame for a TableProvider such as a ListingTable or a custom user defined provider.

source

pub fn read_batch(&self, batch: RecordBatch) -> Result<DataFrame>

Creates a DataFrame for reading a [RecordBatch]

source

pub async fn register_listing_table( &self, name: &str, table_path: impl AsRef<str>, options: ListingOptions, provided_schema: Option<SchemaRef>, sql_definition: Option<String> ) -> Result<()>

Registers a [ListingTable] that can assemble multiple files from locations in an ObjectStore instance into a single table.

This method is async because it might need to resolve the schema.

source

pub async fn register_csv( &self, name: &str, table_path: &str, options: CsvReadOptions<'_> ) -> Result<()>

Registers a CSV file as a table which can referenced from SQL statements executed against this context.

source

pub async fn register_json( &self, name: &str, table_path: &str, options: NdJsonReadOptions<'_> ) -> Result<()>

Registers a Json file as a table that it can be referenced from SQL statements executed against this context.

source

pub async fn register_parquet( &self, name: &str, table_path: &str, options: ParquetReadOptions<'_> ) -> Result<()>

Registers a Parquet file as a table that can be referenced from SQL statements executed against this context.

source

pub async fn register_avro( &self, name: &str, table_path: &str, options: AvroReadOptions<'_> ) -> Result<()>

Registers an Avro file as a table that can be referenced from SQL statements executed against this context.

source

pub fn register_catalog( &self, name: impl Into<String>, catalog: Arc<dyn CatalogProvider> ) -> Option<Arc<dyn CatalogProvider>>

Registers a named catalog using a custom CatalogProvider so that it can be referenced from SQL statements executed against this context.

Returns the CatalogProvider previously registered for this name, if any

source

pub fn catalog_names(&self) -> Vec<String>

Retrieves the list of available catalog names.

source

pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>

Retrieves a CatalogProvider instance by name

source

pub fn register_table<'a>( &'a self, table_ref: impl Into<TableReference<'a>>, provider: Arc<dyn TableProvider> ) -> Result<Option<Arc<dyn TableProvider>>>

Registers a TableProvider as a table that can be referenced from SQL statements executed against this context.

Returns the TableProvider previously registered for this reference, if any

source

pub fn deregister_table<'a>( &'a self, table_ref: impl Into<TableReference<'a>> ) -> Result<Option<Arc<dyn TableProvider>>>

Deregisters the given table.

Returns the registered provider, if any

source

pub fn table_exist<'a>( &'a self, table_ref: impl Into<TableReference<'a>> ) -> Result<bool>

Return true if the specified table exists in the schema provider.

source

pub async fn table<'a>( &self, table_ref: impl Into<TableReference<'a>> ) -> Result<DataFrame>

Retrieves a DataFrame representing a table previously registered by calling the register_table function.

Returns an error if no table has been registered with the provided reference.

source

pub async fn table_provider<'a>( &self, table_ref: impl Into<TableReference<'a>> ) -> Result<Arc<dyn TableProvider>>

Return a TableProvider for the specified table.

source

pub fn tables(&self) -> Result<HashSet<String>>

👎Deprecated: Please use the catalog provider interface (SessionContext::catalog) to examine available catalogs, schemas, and tables

Returns the set of available tables in the default catalog and schema.

Use table to get a specific table.

source

pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>

👎Deprecated: Use SessionState::optimize to ensure a consistent state for planning and execution

Optimizes the logical plan by applying optimizer rules.

source

pub async fn create_physical_plan( &self, logical_plan: &LogicalPlan ) -> Result<Arc<dyn ExecutionPlan>>

👎Deprecated: Use SessionState::create_physical_plan or DataFrame::create_physical_plan to ensure a consistent state for planning and execution

Creates a physical plan from a logical plan.

source

pub async fn write_csv( &self, plan: Arc<dyn ExecutionPlan>, path: impl AsRef<str> ) -> Result<()>

Executes a query and writes the results to a partitioned CSV file.

source

pub async fn write_json( &self, plan: Arc<dyn ExecutionPlan>, path: impl AsRef<str> ) -> Result<()>

Executes a query and writes the results to a partitioned JSON file.

source

pub async fn write_parquet( &self, plan: Arc<dyn ExecutionPlan>, path: impl AsRef<str>, writer_properties: Option<WriterProperties> ) -> Result<()>

Executes a query and writes the results to a partitioned Parquet file.

source

pub fn task_ctx(&self) -> Arc<TaskContext>

Get a new TaskContext to run in this session

source

pub fn state(&self) -> SessionState

Snapshots the SessionState of this SessionContext setting the query_execution_start_time to the current time

source

pub fn state_weak_ref(&self) -> Weak<RwLock<SessionState>>

Get weak reference to SessionState

source

pub fn register_catalog_list(&mut self, catalog_list: Arc<dyn CatalogList>)

Trait Implementations§

source§

impl Clone for SessionContext

source§

fn clone(&self) -> SessionContext

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl Default for SessionContext

source§

fn default() -> Self

Returns the “default value” for a type. Read more
source§

impl From<&SessionContext> for TaskContext

Create a new task context instance from SessionContext

source§

fn from(session: &SessionContext) -> Self

Converts to this type from the input type.
source§

impl FunctionRegistry for SessionContext

source§

fn udfs(&self) -> HashSet<String>

Set of all available udfs.
source§

fn udf(&self, name: &str) -> Result<Arc<ScalarUDF>>

Returns a reference to the udf named name.
source§

fn udaf(&self, name: &str) -> Result<Arc<AggregateUDF>>

Returns a reference to the udaf named name.

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

const: unstable · source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

const: unstable · source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for Twhere U: From<T>,

const: unstable · source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Same<T> for T

§

type Output = T

Should always be Self
source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
const: unstable · source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
const: unstable · source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

fn vzip(self) -> V

source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more