Struct datafusion::execution::context::ExecutionContext
source · [−]pub struct ExecutionContext {
pub state: Arc<Mutex<ExecutionContextState>>,
}
Expand description
ExecutionContext is the main interface for executing queries with DataFusion. The context provides the following functionality:
- Create DataFrame from a CSV or Parquet data source.
- Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
- Register a custom data source that can be referenced from a SQL query.
- Execution a SQL query
The following example demonstrates how to use the context to execute a query against a CSV data source using the DataFrame API:
use datafusion::prelude::*;
let mut ctx = ExecutionContext::new();
let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("a").lt_eq(col("b")))?
.aggregate(vec![col("a")], vec![min(col("b"))])?
.limit(100)?;
let results = df.collect();
The following example demonstrates how to execute the same query using SQL:
use datafusion::prelude::*;
let mut ctx = ExecutionContext::new();
ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
Fields
state: Arc<Mutex<ExecutionContextState>>
Internal state for the context
Implementations
sourceimpl ExecutionContext
impl ExecutionContext
sourcepub fn with_config(config: ExecutionConfig) -> Self
pub fn with_config(config: ExecutionConfig) -> Self
Creates a new execution context using the provided configuration.
sourcepub fn runtime_env(&self) -> Arc<RuntimeEnv>
pub fn runtime_env(&self) -> Arc<RuntimeEnv>
Return the RuntimeEnv used to run queries with this ExecutionContext
sourcepub async fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>>
pub async fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>>
Creates a dataframe that will execute a SQL query.
This method is async
because queries of type CREATE EXTERNAL TABLE
might require the schema to be inferred.
sourcepub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
Creates a logical plan.
This function is intended for internal use and should not be called directly.
sourcepub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
pub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
Registers a variable provider within this context.
sourcepub fn register_udf(&mut self, f: ScalarUDF)
pub fn register_udf(&mut self, f: ScalarUDF)
Registers a scalar UDF within this context.
Note in SQL queries, function names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_FUNC(x)...
will look for a function named "my_func"
SELECT "my_FUNC"(x)
will look for a function named "my_FUNC"
sourcepub fn register_udaf(&mut self, f: AggregateUDF)
pub fn register_udaf(&mut self, f: AggregateUDF)
Registers an aggregate UDF within this context.
Note in SQL queries, aggregate names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_UDAF(x)...
will look for an aggregate named "my_udaf"
SELECT "my_UDAF"(x)
will look for an aggregate named "my_UDAF"
sourcepub async fn read_avro(
&mut self,
uri: impl Into<String>,
options: AvroReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
pub async fn read_avro(
&mut self,
uri: impl Into<String>,
options: AvroReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
Creates a DataFrame for reading an Avro data source.
sourcepub fn read_empty(&self) -> Result<Arc<dyn DataFrame>>
pub fn read_empty(&self) -> Result<Arc<dyn DataFrame>>
Creates an empty DataFrame.
sourcepub async fn read_csv(
&mut self,
uri: impl Into<String>,
options: CsvReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
pub async fn read_csv(
&mut self,
uri: impl Into<String>,
options: CsvReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
Creates a DataFrame for reading a CSV data source.
sourcepub async fn read_parquet(
&mut self,
uri: impl Into<String>
) -> Result<Arc<dyn DataFrame>>
pub async fn read_parquet(
&mut self,
uri: impl Into<String>
) -> Result<Arc<dyn DataFrame>>
Creates a DataFrame for reading a Parquet data source.
sourcepub fn read_table(
&mut self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<dyn DataFrame>>
pub fn read_table(
&mut self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<dyn DataFrame>>
Creates a DataFrame for reading a custom TableProvider.
sourcepub async fn register_listing_table<'a>(
&'a mut self,
name: &'a str,
uri: &'a str,
options: ListingOptions,
provided_schema: Option<SchemaRef>
) -> Result<()>
pub async fn register_listing_table<'a>(
&'a mut self,
name: &'a str,
uri: &'a str,
options: ListingOptions,
provided_schema: Option<SchemaRef>
) -> Result<()>
Registers a table that uses the listing feature of the object store to find the files to be processed This is async because it might need to resolve the schema.
sourcepub async fn register_csv(
&mut self,
name: &str,
uri: &str,
options: CsvReadOptions<'_>
) -> Result<()>
pub async fn register_csv(
&mut self,
name: &str,
uri: &str,
options: CsvReadOptions<'_>
) -> Result<()>
Registers a CSV data source so that it can be referenced from SQL statements executed against this context.
sourcepub async fn register_parquet(&mut self, name: &str, uri: &str) -> Result<()>
pub async fn register_parquet(&mut self, name: &str, uri: &str) -> Result<()>
Registers a Parquet data source so that it can be referenced from SQL statements executed against this context.
sourcepub async fn register_avro(
&mut self,
name: &str,
uri: &str,
options: AvroReadOptions<'_>
) -> Result<()>
pub async fn register_avro(
&mut self,
name: &str,
uri: &str,
options: AvroReadOptions<'_>
) -> Result<()>
Registers an Avro data source so that it can be referenced from SQL statements executed against this context.
sourcepub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
pub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
Registers a named catalog using a custom CatalogProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the CatalogProvider
previously registered for this
name, if any
sourcepub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
Retrieves a CatalogProvider
instance by name
sourcepub fn register_object_store(
&self,
scheme: impl Into<String>,
object_store: Arc<dyn ObjectStore>
) -> Option<Arc<dyn ObjectStore>>
pub fn register_object_store(
&self,
scheme: impl Into<String>,
object_store: Arc<dyn ObjectStore>
) -> Option<Arc<dyn ObjectStore>>
Registers a object store with scheme using a custom ObjectStore
so that
an external file system or object storage system could be used against this context.
Returns the ObjectStore
previously registered for this scheme, if any
sourcepub fn object_store<'a>(
&self,
uri: &'a str
) -> Result<(Arc<dyn ObjectStore>, &'a str)>
pub fn object_store<'a>(
&self,
uri: &'a str
) -> Result<(Arc<dyn ObjectStore>, &'a str)>
Retrieves a ObjectStore
instance by scheme
sourcepub fn register_table<'a>(
&'a mut self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn register_table<'a>(
&'a mut self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
Registers a table using a custom TableProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the TableProvider
previously registered for this
reference, if any
sourcepub fn deregister_table<'a>(
&'a mut self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn deregister_table<'a>(
&'a mut self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
Deregisters the given table.
Returns the registered provider, if any
sourcepub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<dyn DataFrame>>
pub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<dyn DataFrame>>
Retrieves a DataFrame representing a table previously registered by calling the register_table function.
Returns an error if no table has been registered with the provided reference.
sourcepub fn tables(&self) -> Result<HashSet<String>>
👎 Deprecated: Please use the catalog provider interface (ExecutionContext::catalog
) to examine available catalogs, schemas, and tables
pub fn tables(&self) -> Result<HashSet<String>>
Please use the catalog provider interface (ExecutionContext::catalog
) to examine available catalogs, schemas, and tables
Returns the set of available tables in the default catalog and schema.
Use table
to get a specific table.
sourcepub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
Optimizes the logical plan by applying optimizer rules.
sourcepub async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
pub async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
Creates a physical plan from a logical plan.
sourcepub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
pub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
Executes a query and writes the results to a partitioned CSV file.
sourcepub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>,
writer_properties: Option<WriterProperties>
) -> Result<()>
pub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>,
writer_properties: Option<WriterProperties>
) -> Result<()>
Executes a query and writes the results to a partitioned Parquet file.
Trait Implementations
sourceimpl Clone for ExecutionContext
impl Clone for ExecutionContext
sourcefn clone(&self) -> ExecutionContext
fn clone(&self) -> ExecutionContext
Returns a copy of the value. Read more
1.0.0 · sourcefn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from source
. Read more
sourceimpl Default for ExecutionContext
impl Default for ExecutionContext
sourceimpl From<Arc<Mutex<RawMutex, ExecutionContextState>>> for ExecutionContext
impl From<Arc<Mutex<RawMutex, ExecutionContextState>>> for ExecutionContext
sourcefn from(state: Arc<Mutex<ExecutionContextState>>) -> Self
fn from(state: Arc<Mutex<ExecutionContextState>>) -> Self
Performs the conversion.
sourceimpl FunctionRegistry for ExecutionContext
impl FunctionRegistry for ExecutionContext
Auto Trait Implementations
impl !RefUnwindSafe for ExecutionContext
impl Send for ExecutionContext
impl Sync for ExecutionContext
impl Unpin for ExecutionContext
impl !UnwindSafe for ExecutionContext
Blanket Implementations
sourceimpl<T> BorrowMut<T> for T where
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
const: unstable · sourcepub fn borrow_mut(&mut self) -> &mut T
pub fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
sourceimpl<T> ToOwned for T where
T: Clone,
impl<T> ToOwned for T where
T: Clone,
type Owned = T
type Owned = T
The resulting type after obtaining ownership.
sourcepub fn to_owned(&self) -> T
pub fn to_owned(&self) -> T
Creates owned data from borrowed data, usually by cloning. Read more
sourcepub fn clone_into(&self, target: &mut T)
pub fn clone_into(&self, target: &mut T)
toowned_clone_into
)Uses borrowed data to replace owned data, usually by cloning. Read more