Struct datafusion::execution::context::ExecutionContext [−][src]
pub struct ExecutionContext { pub state: Arc<Mutex<ExecutionContextState>>, }
ExecutionContext is the main interface for executing queries with DataFusion. The context provides the following functionality:
- Create DataFrame from a CSV or Parquet data source.
- Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
- Register a custom data source that can be referenced from a SQL query.
- Execution a SQL query
The following example demonstrates how to use the context to execute a query against a CSV data source using the DataFrame API:
use datafusion::prelude::*; let mut ctx = ExecutionContext::new(); let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?; let df = df.filter(col("a").lt_eq(col("b")))? .aggregate(vec![col("a")], vec![min(col("b"))])? .limit(100)?; let results = df.collect();
The following example demonstrates how to execute the same query using SQL:
use datafusion::prelude::*; let mut ctx = ExecutionContext::new(); ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?; let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
Fields
state: Arc<Mutex<ExecutionContextState>>
Internal state for the context
Implementations
impl ExecutionContext
[src]
impl ExecutionContext
[src]pub fn new() -> Self
[src]
Creates a new execution context using a default configuration.
pub fn with_config(config: ExecutionConfig) -> Self
[src]
Creates a new execution context using the provided configuration.
pub fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>>
[src]
Creates a dataframe that will execute a SQL query.
pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
[src]
Creates a logical plan.
This function is intended for internal use and should not be called directly.
pub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
[src]
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
Registers a variable provider within this context.
pub fn register_udf(&mut self, f: ScalarUDF)
[src]
Registers a scalar UDF within this context.
Note in SQL queries, function names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_FUNC(x)...
will look for a function named "my_func"
SELECT "my_FUNC"(x)
will look for a function named "my_FUNC"
pub fn register_udaf(&mut self, f: AggregateUDF)
[src]
Registers an aggregate UDF within this context.
Note in SQL queries, aggregate names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_UDAF(x)...
will look for an aggregate named "my_udaf"
SELECT "my_UDAF"(x)
will look for an aggregate named "my_UDAF"
pub fn read_csv(
&mut self,
filename: &str,
options: CsvReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
[src]
&mut self,
filename: &str,
options: CsvReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
Creates a DataFrame for reading a CSV data source.
pub fn read_parquet(&mut self, filename: &str) -> Result<Arc<dyn DataFrame>>
[src]
Creates a DataFrame for reading a Parquet data source.
pub fn read_table(
&mut self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<dyn DataFrame>>
[src]
&mut self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<dyn DataFrame>>
Creates a DataFrame for reading a custom TableProvider.
pub fn register_csv(
&mut self,
name: &str,
filename: &str,
options: CsvReadOptions<'_>
) -> Result<()>
[src]
&mut self,
name: &str,
filename: &str,
options: CsvReadOptions<'_>
) -> Result<()>
Registers a CSV data source so that it can be referenced from SQL statements executed against this context.
pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()>
[src]
Registers a Parquet data source so that it can be referenced from SQL statements executed against this context.
pub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
[src]
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
Registers a named catalog using a custom CatalogProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the CatalogProvider
previously registered for this
name, if any
pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
[src]
Retrieves a CatalogProvider
instance by name
pub fn register_table<'a>(
&'a mut self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
[src]
&'a mut self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
Registers a table using a custom TableProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the TableProvider
previously registered for this
reference, if any
pub fn deregister_table<'a>(
&'a mut self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
[src]
&'a mut self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
Deregisters the given table.
Returns the registered provider, if any
pub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<dyn DataFrame>>
[src]
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<dyn DataFrame>>
Retrieves a DataFrame representing a table previously registered by calling the register_table function.
Returns an error if no table has been registered with the provided reference.
pub fn tables(&self) -> Result<HashSet<String>>
[src]
Please use the catalog provider interface (ExecutionContext::catalog
) to examine available catalogs, schemas, and tables
Returns the set of available tables in the default catalog and schema.
Use table
to get a specific table.
pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
[src]
Optimizes the logical plan by applying optimizer rules.
pub fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
[src]
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
Creates a physical plan from a logical plan.
pub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: String
) -> Result<()>
[src]
&self,
plan: Arc<dyn ExecutionPlan>,
path: String
) -> Result<()>
Executes a query and writes the results to a partitioned CSV file.
pub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: String,
writer_properties: Option<WriterProperties>
) -> Result<()>
[src]
&self,
plan: Arc<dyn ExecutionPlan>,
path: String,
writer_properties: Option<WriterProperties>
) -> Result<()>
Executes a query and writes the results to a partitioned Parquet file.
Trait Implementations
impl Clone for ExecutionContext
[src]
impl Clone for ExecutionContext
[src]fn clone(&self) -> ExecutionContext
[src]
pub fn clone_from(&mut self, source: &Self)
1.0.0[src]
impl From<Arc<Mutex<ExecutionContextState>>> for ExecutionContext
[src]
impl From<Arc<Mutex<ExecutionContextState>>> for ExecutionContext
[src]fn from(state: Arc<Mutex<ExecutionContextState>>) -> Self
[src]
Auto Trait Implementations
impl RefUnwindSafe for ExecutionContext
impl RefUnwindSafe for ExecutionContext
impl Send for ExecutionContext
impl Send for ExecutionContext
impl Sync for ExecutionContext
impl Sync for ExecutionContext
impl Unpin for ExecutionContext
impl Unpin for ExecutionContext
impl UnwindSafe for ExecutionContext
impl UnwindSafe for ExecutionContext