[][src]Struct datafusion::execution::context::ExecutionContext

pub struct ExecutionContext {
    pub state: Arc<Mutex<ExecutionContextState>>,
}

ExecutionContext is the main interface for executing queries with DataFusion. The context provides the following functionality:

  • Create DataFrame from a CSV or Parquet data source.
  • Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
  • Register a custom data source that can be referenced from a SQL query.
  • Execution a SQL query

The following example demonstrates how to use the context to execute a query against a CSV data source using the DataFrame API:

use datafusion::prelude::*;
let mut ctx = ExecutionContext::new();
let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
let df = df.filter(col("a").lt_eq(col("b")))?
           .aggregate(vec![col("a")], vec![min(col("b"))])?
           .limit(100)?;
let results = df.collect();

The following example demonstrates how to execute the same query using SQL:

use datafusion::prelude::*;

let mut ctx = ExecutionContext::new();
ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;

Fields

state: Arc<Mutex<ExecutionContextState>>

Internal state for the context

Implementations

impl ExecutionContext[src]

pub fn new() -> Self[src]

Create a new execution context using a default configuration.

pub fn with_config(config: ExecutionConfig) -> Self[src]

Create a new execution context using the provided configuration

pub fn sql(&mut self, sql: &str) -> Result<Arc<dyn DataFrame>>[src]

of RecordBatch instances)

pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>[src]

Creates a logical plan. This function is intended for internal use and should not be called directly.

pub fn register_variable(
    &mut self,
    variable_type: VarType,
    provider: Arc<dyn VarProvider + Send + Sync>
)
[src]

Register variable

pub fn register_udf(&mut self, f: ScalarUDF)[src]

Register a scalar UDF

pub fn register_udaf(&mut self, f: AggregateUDF)[src]

Register a aggregate UDF

pub fn read_csv(
    &mut self,
    filename: &str,
    options: CsvReadOptions<'_>
) -> Result<Arc<dyn DataFrame>>
[src]

Creates a DataFrame for reading a CSV data source.

pub fn read_parquet(&mut self, filename: &str) -> Result<Arc<dyn DataFrame>>[src]

Creates a DataFrame for reading a Parquet data source.

pub fn read_table(
    &mut self,
    provider: Arc<dyn TableProvider + Send + Sync>
) -> Result<Arc<dyn DataFrame>>
[src]

Creates a DataFrame for reading a custom TableProvider

pub fn register_csv(
    &mut self,
    name: &str,
    filename: &str,
    options: CsvReadOptions<'_>
) -> Result<()>
[src]

Register a CSV data source so that it can be referenced from SQL statements executed against this context.

pub fn register_parquet(&mut self, name: &str, filename: &str) -> Result<()>[src]

Register a Parquet data source so that it can be referenced from SQL statements executed against this context.

pub fn register_table(
    &mut self,
    name: &str,
    provider: Box<dyn TableProvider + Send + Sync>
)
[src]

Register a table using a custom TableProvider so that it can be referenced from SQL statements executed against this context.

pub fn table(&self, table_name: &str) -> Result<Arc<dyn DataFrame>>[src]

Retrieves a DataFrame representing a table previously registered by calling the register_table function. An Err result will be returned if no table has been registered with the provided name.

pub fn tables(&self) -> HashSet<String>[src]

The set of available tables. Use table to get a specific table.

pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>[src]

Optimize the logical plan by applying optimizer rules

pub fn create_physical_plan(
    &self,
    logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
[src]

Create a physical plan from a logical plan

pub async fn write_csv(
    &self,
    plan: Arc<dyn ExecutionPlan>,
    path: String
) -> Result<()>
[src]

Execute a query and write the results to a partitioned CSV file

pub async fn write_parquet(
    &self,
    plan: Arc<dyn ExecutionPlan>,
    path: String,
    writer_properties: Option<WriterProperties>
) -> Result<()>
[src]

Execute a query and write the results to a partitioned Parquet file

Trait Implementations

impl From<Arc<Mutex<ExecutionContextState>>> for ExecutionContext[src]

impl FunctionRegistry for ExecutionContext[src]

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> Pointable for T

type Init = T

The type for initializers.

impl<T> Same<T> for T

type Output = T

Should always be Self

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<V, T> VZip<V> for T where
    V: MultiLane<T>,