Struct datafusion::execution::context::SessionContext
source · [−]pub struct SessionContext {
pub session_start_time: DateTime<Utc>,
pub state: Arc<RwLock<SessionState>>,
pub table_factories: HashMap<String, Arc<dyn TableProviderFactory>>,
/* private fields */
}
Expand description
SessionContext is the main interface for executing queries with DataFusion. It stands for the connection between user and DataFusion/Ballista cluster. The context provides the following functionality
- Create DataFrame from a CSV or Parquet data source.
- Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
- Register a custom data source that can be referenced from a SQL query.
- Execution a SQL query
The following example demonstrates how to use the context to execute a query against a CSV data source using the DataFrame API:
use datafusion::prelude::*;
let ctx = SessionContext::new();
let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("a").lt_eq(col("b")))?
.aggregate(vec![col("a")], vec![min(col("b"))])?
.limit(0, Some(100))?;
let results = df.collect();
The following example demonstrates how to execute the same query using SQL:
use datafusion::prelude::*;
let mut ctx = SessionContext::new();
ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
Fields
session_start_time: DateTime<Utc>
Session start time
state: Arc<RwLock<SessionState>>
Shared session state for the session
table_factories: HashMap<String, Arc<dyn TableProviderFactory>>
Dynamic table providers
Implementations
sourceimpl SessionContext
impl SessionContext
sourcepub fn with_config(config: SessionConfig) -> Self
pub fn with_config(config: SessionConfig) -> Self
Creates a new session context using the provided session configuration.
sourcepub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self
pub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self
Creates a new session context using the provided configuration and RuntimeEnv.
sourcepub fn with_state(state: SessionState) -> Self
pub fn with_state(state: SessionState) -> Self
Creates a new session context using the provided session state.
sourcepub fn register_table_factory(
&mut self,
file_type: &str,
factory: Arc<dyn TableProviderFactory>
)
pub fn register_table_factory(
&mut self,
file_type: &str,
factory: Arc<dyn TableProviderFactory>
)
Register a TableProviderFactory
for a given file_type
identifier
sourcepub fn runtime_env(&self) -> Arc<RuntimeEnv>
pub fn runtime_env(&self) -> Arc<RuntimeEnv>
Return the RuntimeEnv used to run queries with this SessionContext
sourcepub fn session_id(&self) -> String
pub fn session_id(&self) -> String
Return the session_id of this Session
sourcepub fn copied_config(&self) -> SessionConfig
pub fn copied_config(&self) -> SessionConfig
Return a copied version of config for this Session
sourcepub async fn sql(&self, sql: &str) -> Result<Arc<DataFrame>>
pub async fn sql(&self, sql: &str) -> Result<Arc<DataFrame>>
Creates a dataframe that will execute a SQL query.
This method is async
because queries of type CREATE EXTERNAL TABLE
might require the schema to be inferred.
sourcepub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
Creates a logical plan.
This function is intended for internal use and should not be called directly.
sourcepub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
pub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
Registers a variable provider within this context.
sourcepub fn register_udf(&mut self, f: ScalarUDF)
pub fn register_udf(&mut self, f: ScalarUDF)
Registers a scalar UDF within this context.
Note in SQL queries, function names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_FUNC(x)...
will look for a function named "my_func"
SELECT "my_FUNC"(x)
will look for a function named "my_FUNC"
sourcepub fn register_udaf(&mut self, f: AggregateUDF)
pub fn register_udaf(&mut self, f: AggregateUDF)
Registers an aggregate UDF within this context.
Note in SQL queries, aggregate names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_UDAF(x)...
will look for an aggregate named "my_udaf"
SELECT "my_UDAF"(x)
will look for an aggregate named "my_UDAF"
sourcepub async fn read_avro(
&self,
table_path: impl AsRef<str>,
options: AvroReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_avro(
&self,
table_path: impl AsRef<str>,
options: AvroReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame for reading an Avro data source.
sourcepub async fn read_json(
&mut self,
table_path: impl AsRef<str>,
options: NdJsonReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_json(
&mut self,
table_path: impl AsRef<str>,
options: NdJsonReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame for reading an Json data source.
sourcepub fn read_empty(&self) -> Result<Arc<DataFrame>>
pub fn read_empty(&self) -> Result<Arc<DataFrame>>
Creates an empty DataFrame.
sourcepub async fn read_csv(
&self,
table_path: impl AsRef<str>,
options: CsvReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_csv(
&self,
table_path: impl AsRef<str>,
options: CsvReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame for reading a CSV data source.
sourcepub async fn read_parquet(
&self,
table_path: impl AsRef<str>,
options: ParquetReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_parquet(
&self,
table_path: impl AsRef<str>,
options: ParquetReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame for reading a Parquet data source.
sourcepub fn read_table(
&self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<DataFrame>>
pub fn read_table(
&self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<DataFrame>>
Creates a DataFrame for reading a custom TableProvider.
sourcepub async fn register_listing_table(
&self,
name: &str,
table_path: impl AsRef<str>,
options: ListingOptions,
provided_schema: Option<SchemaRef>,
sql: Option<String>
) -> Result<()>
pub async fn register_listing_table(
&self,
name: &str,
table_path: impl AsRef<str>,
options: ListingOptions,
provided_schema: Option<SchemaRef>,
sql: Option<String>
) -> Result<()>
Registers a table that uses the listing feature of the object store to find the files to be processed This is async because it might need to resolve the schema.
sourcepub async fn register_csv(
&self,
name: &str,
table_path: &str,
options: CsvReadOptions<'_>
) -> Result<()>
pub async fn register_csv(
&self,
name: &str,
table_path: &str,
options: CsvReadOptions<'_>
) -> Result<()>
Registers a CSV data source so that it can be referenced from SQL statements executed against this context.
sourcepub async fn register_json(
&self,
name: &str,
table_path: &str,
options: NdJsonReadOptions<'_>
) -> Result<()>
pub async fn register_json(
&self,
name: &str,
table_path: &str,
options: NdJsonReadOptions<'_>
) -> Result<()>
executed against this context.
sourcepub async fn register_parquet(
&self,
name: &str,
table_path: &str,
options: ParquetReadOptions<'_>
) -> Result<()>
pub async fn register_parquet(
&self,
name: &str,
table_path: &str,
options: ParquetReadOptions<'_>
) -> Result<()>
Registers a Parquet data source so that it can be referenced from SQL statements executed against this context.
sourcepub async fn register_avro(
&self,
name: &str,
table_path: &str,
options: AvroReadOptions<'_>
) -> Result<()>
pub async fn register_avro(
&self,
name: &str,
table_path: &str,
options: AvroReadOptions<'_>
) -> Result<()>
Registers an Avro data source so that it can be referenced from SQL statements executed against this context.
sourcepub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
pub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
Registers a named catalog using a custom CatalogProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the CatalogProvider
previously registered for this
name, if any
sourcepub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
Retrieves a CatalogProvider
instance by name
sourcepub fn register_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn register_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
Registers a table using a custom TableProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the TableProvider
previously registered for this
reference, if any
sourcepub fn deregister_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn deregister_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
Deregisters the given table.
Returns the registered provider, if any
sourcepub fn table_exist<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<bool>
pub fn table_exist<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<bool>
Check whether the given table exists in the schema provider or not Returns true if the table exists.
sourcepub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<DataFrame>>
pub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<DataFrame>>
Retrieves a DataFrame representing a table previously registered by calling the register_table function.
Returns an error if no table has been registered with the provided reference.
sourcepub fn tables(&self) -> Result<HashSet<String>>
👎Deprecated: Please use the catalog provider interface (SessionContext::catalog
) to examine available catalogs, schemas, and tables
pub fn tables(&self) -> Result<HashSet<String>>
Please use the catalog provider interface (SessionContext::catalog
) to examine available catalogs, schemas, and tables
Returns the set of available tables in the default catalog and schema.
Use table
to get a specific table.
sourcepub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
Optimizes the logical plan by applying optimizer rules.
sourcepub async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
pub async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
Creates a physical plan from a logical plan.
sourcepub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
pub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
Executes a query and writes the results to a partitioned CSV file.
sourcepub async fn write_json(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
pub async fn write_json(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
Executes a query and writes the results to a partitioned JSON file.
sourcepub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>,
writer_properties: Option<WriterProperties>
) -> Result<()>
pub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>,
writer_properties: Option<WriterProperties>
) -> Result<()>
Executes a query and writes the results to a partitioned Parquet file.
sourcepub fn task_ctx(&self) -> Arc<TaskContext>
pub fn task_ctx(&self) -> Arc<TaskContext>
Get a new TaskContext to run in this session
sourcepub fn state(&self) -> SessionState
pub fn state(&self) -> SessionState
Get a copy of the SessionState
of this SessionContext
Trait Implementations
sourceimpl Clone for SessionContext
impl Clone for SessionContext
sourcefn clone(&self) -> SessionContext
fn clone(&self) -> SessionContext
Returns a copy of the value. Read more
1.0.0 · sourcefn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from source
. Read more
sourceimpl Default for SessionContext
impl Default for SessionContext
sourceimpl From<&SessionContext> for TaskContext
impl From<&SessionContext> for TaskContext
Create a new task context instance from SessionContext
sourcefn from(session: &SessionContext) -> Self
fn from(session: &SessionContext) -> Self
Converts to this type from the input type.
sourceimpl FunctionRegistry for SessionContext
impl FunctionRegistry for SessionContext
Auto Trait Implementations
impl !RefUnwindSafe for SessionContext
impl Send for SessionContext
impl Sync for SessionContext
impl Unpin for SessionContext
impl !UnwindSafe for SessionContext
Blanket Implementations
sourceimpl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
const: unstable · sourcefn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more