datafusion/lib.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![deny(clippy::allow_attributes)]
19#![doc(
20 html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
21 html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
22)]
23#![cfg_attr(docsrs, feature(doc_cfg))]
24// Make sure fast / cheap clones on Arc are explicit:
25// https://github.com/apache/datafusion/issues/11143
26//
27// Eliminate unnecessary function calls(some may be not cheap) due to `xxx_or`
28// for performance. Also avoid abusing `xxx_or_else` for readability:
29// https://github.com/apache/datafusion/issues/15802
30#![cfg_attr(
31 not(test),
32 deny(
33 clippy::clone_on_ref_ptr,
34 clippy::or_fun_call,
35 clippy::unnecessary_lazy_evaluations
36 )
37)]
38#![warn(missing_docs, clippy::needless_borrow)]
39// Use `allow` instead of `expect` for test configuration to explicitly
40// disable the lint for all test code rather than expecting violations
41#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
42
43//! [DataFusion] is an extensible query engine written in Rust that
44//! uses [Apache Arrow] as its in-memory format. DataFusion's target users are
45//! developers building fast and feature rich database and analytic systems,
46//! customized to particular workloads. Please see the [DataFusion website] for
47//! additional documentation, [use cases] and examples.
48//!
49//! "Out of the box," DataFusion offers [SQL] and [`Dataframe`] APIs,
50//! excellent [performance], built-in support for CSV, Parquet, JSON, and Avro,
51//! extensive customization, and a great community.
52//! [Python Bindings] are also available.
53//!
54//! DataFusion features a full query planner, a columnar, streaming, multi-threaded,
55//! vectorized execution engine, and partitioned data sources. You can
56//! customize DataFusion at almost all points including additional data sources,
57//! query languages, functions, custom operators and more.
58//! See the [Architecture] section below for more details.
59//!
60//! [DataFusion]: https://datafusion.apache.org/
61//! [DataFusion website]: https://datafusion.apache.org
62//! [Apache Arrow]: https://arrow.apache.org
63//! [use cases]: https://datafusion.apache.org/user-guide/introduction.html#use-cases
64//! [SQL]: https://datafusion.apache.org/user-guide/sql/index.html
65//! [`DataFrame`]: dataframe::DataFrame
66//! [performance]: https://benchmark.clickhouse.com/
67//! [Python Bindings]: https://github.com/apache/datafusion-python
68//! [Architecture]: #architecture
69//!
70//! # Examples
71//!
72//! The main entry point for interacting with DataFusion is the
73//! [`SessionContext`]. [`Expr`]s represent expressions such as `a + b`.
74//!
75//! [`SessionContext`]: execution::context::SessionContext
76//!
77//! ## DataFrame
78//!
79//! To execute a query against data stored
80//! in a CSV file using a [`DataFrame`]:
81//!
82//! ```rust
83//! # use datafusion::prelude::*;
84//! # use datafusion::error::Result;
85//! # use datafusion::functions_aggregate::expr_fn::min;
86//! # use datafusion::arrow::array::RecordBatch;
87//!
88//! # #[tokio::main]
89//! # async fn main() -> Result<()> {
90//! let ctx = SessionContext::new();
91//!
92//! // create the dataframe
93//! let df = ctx
94//! .read_csv("tests/data/example.csv", CsvReadOptions::new())
95//! .await?;
96//!
97//! // create a plan
98//! let df = df
99//! .filter(col("a").lt_eq(col("b")))?
100//! .aggregate(vec![col("a")], vec![min(col("b"))])?
101//! .limit(0, Some(100))?;
102//!
103//! // execute the plan
104//! let results: Vec<RecordBatch> = df.collect().await?;
105//!
106//! // format the results
107//! let pretty_results =
108//! arrow::util::pretty::pretty_format_batches(&results)?.to_string();
109//!
110//! let expected = vec![
111//! "+---+----------------+",
112//! "| a | min(?table?.b) |",
113//! "+---+----------------+",
114//! "| 1 | 2 |",
115//! "+---+----------------+",
116//! ];
117//!
118//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
119//! # Ok(())
120//! # }
121//! ```
122//!
123//! ## SQL
124//!
125//! To execute a query against a CSV file using [SQL]:
126//!
127//! ```
128//! # use datafusion::prelude::*;
129//! # use datafusion::error::Result;
130//! # use datafusion::arrow::array::RecordBatch;
131//!
132//! # #[tokio::main]
133//! # async fn main() -> Result<()> {
134//! let ctx = SessionContext::new();
135//!
136//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new())
137//! .await?;
138//!
139//! // create a plan
140//! let df = ctx
141//! .sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100")
142//! .await?;
143//!
144//! // execute the plan
145//! let results: Vec<RecordBatch> = df.collect().await?;
146//!
147//! // format the results
148//! let pretty_results =
149//! arrow::util::pretty::pretty_format_batches(&results)?.to_string();
150//!
151//! let expected = vec![
152//! "+---+----------------+",
153//! "| a | min(example.b) |",
154//! "+---+----------------+",
155//! "| 1 | 2 |",
156//! "+---+----------------+",
157//! ];
158//!
159//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
160//! # Ok(())
161//! # }
162//! ```
163//!
164//! ## More Examples
165//!
166//! There are many additional annotated examples of using DataFusion in the [datafusion-examples] directory.
167//!
168//! [datafusion-examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples
169//!
170//! # Architecture
171//!
172//! <!-- NOTE: The goal of this section is to provide a high level
173//! overview of how DataFusion is organized and then link to other
174//! sections of the docs with more details -->
175//!
176//! You can find a formal description of DataFusion's architecture in our
177//! [SIGMOD 2024 Paper].
178//!
179//! [SIGMOD 2024 Paper]: https://dl.acm.org/doi/10.1145/3626246.3653368
180//!
181//! ## Design Goals
182//! DataFusion's Architecture Goals are:
183//!
184//! 1. Work βout of the boxβ: Provide a very fast, world class query engine with
185//! minimal setup or required configuration.
186//!
187//! 2. Customizable everything: All behavior should be customizable by
188//! implementing traits.
189//!
190//! 3. Architecturally boring π₯±: Follow industrial best practice rather than
191//! trying cutting edge, but unproven, techniques.
192//!
193//! With these principles, users start with a basic, high-performance engine
194//! and specialize it over time to suit their needs and available engineering
195//! capacity.
196//!
197//! ## Overview Presentations
198//!
199//! The following presentations offer high level overviews of the
200//! different components and how they interact together.
201//!
202//! - [Apr 2023]: The Apache DataFusion Architecture talks
203//! - _Query Engine_: [recording](https://youtu.be/NVKujPxwSBA) and [slides](https://docs.google.com/presentation/d/1D3GDVas-8y0sA4c8EOgdCvEjVND4s2E7I6zfs67Y4j8/edit#slide=id.p)
204//! - _Logical Plan and Expressions_: [recording](https://youtu.be/EzZTLiSJnhY) and [slides](https://docs.google.com/presentation/d/1ypylM3-w60kVDW7Q6S99AHzvlBgciTdjsAfqNP85K30)
205//! - _Physical Plan and Execution_: [recording](https://youtu.be/2jkWU3_w6z0) and [slides](https://docs.google.com/presentation/d/1cA2WQJ2qg6tx6y4Wf8FH2WVSm9JQ5UgmBWATHdik0hg)
206//! - [July 2022]: DataFusion and Arrow: Supercharge Your Data Analytical Tool with a Rusty Query Engine: [recording](https://www.youtube.com/watch?v=Rii1VTn3seQ) and [slides](https://docs.google.com/presentation/d/1q1bPibvu64k2b7LPi7Yyb0k3gA1BiUYiUbEklqW1Ckc/view#slide=id.g11054eeab4c_0_1165)
207//! - [March 2021]: The DataFusion architecture is described in _Query Engine Design and the Rust-Based DataFusion in Apache Arrow_: [recording](https://www.youtube.com/watch?v=K6eCAVEk4kU) (DataFusion content starts [~ 15 minutes in](https://www.youtube.com/watch?v=K6eCAVEk4kU&t=875s)) and [slides](https://www.slideshare.net/influxdata/influxdb-iox-tech-talks-query-engine-design-and-the-rustbased-datafusion-in-apache-arrow-244161934)
208//! - [February 2021]: How DataFusion is used within the Ballista Project is described in _Ballista: Distributed Compute with Rust and Apache Arrow_: [recording](https://www.youtube.com/watch?v=ZZHQaOap9pQ)
209//!
210//! ## Customization and Extension
211//!
212//! DataFusion is designed to be highly extensible, so you can
213//! start with a working, full featured engine, and then
214//! specialize any behavior for your use case. For example,
215//! some projects may add custom [`ExecutionPlan`] operators, or create their own
216//! query language that directly creates [`LogicalPlan`] rather than using the
217//! built in SQL planner, [`SqlToRel`].
218//!
219//! In order to achieve this, DataFusion supports extension at many points:
220//!
221//! * read from any datasource ([`TableProvider`])
222//! * define your own catalogs, schemas, and table lists ([`catalog`] and [`CatalogProvider`])
223//! * build your own query language or plans ([`LogicalPlanBuilder`])
224//! * declare and use user-defined functions ([`ScalarUDF`], and [`AggregateUDF`], [`WindowUDF`])
225//! * add custom plan rewrite passes ([`AnalyzerRule`], [`OptimizerRule`] and [`PhysicalOptimizerRule`])
226//! * extend the planner to use user-defined logical and physical nodes ([`QueryPlanner`])
227//!
228//! You can find examples of each of them in the [datafusion-examples] directory.
229//!
230//! [`TableProvider`]: crate::datasource::TableProvider
231//! [`CatalogProvider`]: crate::catalog::CatalogProvider
232//! [`LogicalPlanBuilder`]: datafusion_expr::logical_plan::builder::LogicalPlanBuilder
233//! [`ScalarUDF`]: crate::logical_expr::ScalarUDF
234//! [`AggregateUDF`]: crate::logical_expr::AggregateUDF
235//! [`WindowUDF`]: crate::logical_expr::WindowUDF
236//! [`QueryPlanner`]: execution::context::QueryPlanner
237//! [`OptimizerRule`]: datafusion_optimizer::optimizer::OptimizerRule
238//! [`AnalyzerRule`]: datafusion_optimizer::analyzer::AnalyzerRule
239//! [`PhysicalOptimizerRule`]: datafusion_physical_optimizer::PhysicalOptimizerRule
240//!
241//! ## Query Planning and Execution Overview
242//!
243//! ### SQL
244//!
245//! ```text
246//! Parsed with SqlToRel creates
247//! sqlparser initial plan
248//! βββββββββββββββββ βββββββββββ βββββββββββββββ
249//! β SELECT * β βQuery { β βProject β
250//! β FROM ... ββββββββββββΆβ.. ββββββββββββββΆβ TableScan β
251//! β β β} β β ... β
252//! βββββββββββββββββ βββββββββββ βββββββββββββββ
253//!
254//! SQL String sqlparser LogicalPlan
255//! AST nodes
256//! ```
257//!
258//! 1. The query string is parsed to an Abstract Syntax Tree (AST)
259//! [`Statement`] using [sqlparser].
260//!
261//! 2. The AST is converted to a [`LogicalPlan`] and logical expressions
262//! [`Expr`]s to compute the desired result by [`SqlToRel`]. This phase
263//! also includes name and type resolution ("binding").
264//!
265//! [`Statement`]: https://docs.rs/sqlparser/latest/sqlparser/ast/enum.Statement.html
266//!
267//! ### DataFrame
268//!
269//! When executing plans using the [`DataFrame`] API, the process is
270//! identical as with SQL, except the DataFrame API builds the
271//! [`LogicalPlan`] directly using [`LogicalPlanBuilder`]. Systems
272//! that have their own custom query languages typically also build
273//! [`LogicalPlan`] directly.
274//!
275//! ### Planning
276//!
277//! ```text
278//! AnalyzerRules and PhysicalPlanner PhysicalOptimizerRules
279//! OptimizerRules creates ExecutionPlan improve performance
280//! rewrite plan
281//! βββββββββββββββ βββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ
282//! βProject β βProject(x, y)β βProjectExec β βProjectExec β
283//! β TableScan βββ...βββΆβ TableScan βββββββΆβ ... βββ...βββΆβ ... β
284//! β ... β β ... β β DataSourceExecβ β DataSourceExecβ
285//! βββββββββββββββ βββββββββββββββ βββββββββββββββββββ βββββββββββββββββββ
286//!
287//! LogicalPlan LogicalPlan ExecutionPlan ExecutionPlan
288//! ```
289//!
290//! To process large datasets with many rows as efficiently as
291//! possible, significant effort is spent planning and
292//! optimizing, in the following manner:
293//!
294//! 1. The [`LogicalPlan`] is checked and rewritten to enforce
295//! semantic rules, such as type coercion, by [`AnalyzerRule`]s
296//!
297//! 2. The [`LogicalPlan`] is rewritten by [`OptimizerRule`]s, such as
298//! projection and filter pushdown, to improve its efficiency.
299//!
300//! 3. The [`LogicalPlan`] is converted to an [`ExecutionPlan`] by a
301//! [`PhysicalPlanner`]
302//!
303//! 4. The [`ExecutionPlan`] is rewritten by
304//! [`PhysicalOptimizerRule`]s, such as sort and join selection, to
305//! improve its efficiency.
306//!
307//! ## Data Sources
308//!
309//! ```text
310//! Planning β
311//! requests β TableProvider::scan
312//! information β creates an
313//! such as schema β ExecutionPlan
314//! β
315//! βΌ
316//! βββββββββββββββββββββββββββ βββββββββββββββββ
317//! β β β β
318//! βimpl TableProvider ββββββββββΆβDataSourceExec β
319//! β β β β
320//! βββββββββββββββββββββββββββ βββββββββββββββββ
321//! TableProvider
322//! (built in or user provided) ExecutionPlan
323//! ```
324//!
325//! A [`TableProvider`] provides information for planning and
326//! an [`ExecutionPlan`] for execution. DataFusion includes two built-in
327//! table providers that support common file formats and require no runtime services,
328//! [`ListingTable`] and [`MemTable`]. You can add support for any other data
329//! source and/or file formats by implementing the [`TableProvider`] trait.
330//!
331//! See also:
332//!
333//! 1. [`ListingTable`]: Reads data from one or more Parquet, JSON, CSV, or AVRO
334//! files in one or more local or remote directories. Supports HIVE style
335//! partitioning, optional compression, directly reading from remote
336//! object store, file metadata caching, and more.
337//!
338//! 2. [`MemTable`]: Reads data from in memory [`RecordBatch`]es.
339//!
340//! 3. [`StreamingTable`]: Reads data from potentially unbounded inputs.
341//!
342//! [`ListingTable`]: crate::datasource::listing::ListingTable
343//! [`MemTable`]: crate::datasource::memory::MemTable
344//! [`StreamingTable`]: crate::catalog::streaming::StreamingTable
345//!
346//! ## Plan Representations
347//!
348//! ### Logical Plans
349//! Logical planning yields [`LogicalPlan`] nodes and [`Expr`]
350//! representing expressions which are [`Schema`] aware and represent statements
351//! independent of how they are physically executed.
352//! A [`LogicalPlan`] is a Directed Acyclic Graph (DAG) of other
353//! [`LogicalPlan`]s, each potentially containing embedded [`Expr`]s.
354//!
355//! [`LogicalPlan`]s can be rewritten with [`TreeNode`] API, see the
356//! [`tree_node module`] for more details.
357//!
358//! [`Expr`]s can also be rewritten with [`TreeNode`] API and simplified using
359//! [`ExprSimplifier`]. Examples of working with and executing [`Expr`]s can be
360//! found in the [`expr_api`.rs] example
361//!
362//! [`TreeNode`]: datafusion_common::tree_node::TreeNode
363//! [`tree_node module`]: datafusion_expr::logical_plan::tree_node
364//! [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier
365//! [`expr_api`.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs
366//!
367//! ### Physical Plans
368//!
369//! An [`ExecutionPlan`] (sometimes referred to as a "physical plan")
370//! is a plan that can be executed against data. It a DAG of other
371//! [`ExecutionPlan`]s each potentially containing expressions that implement the
372//! [`PhysicalExpr`] trait.
373//!
374//! Compared to a [`LogicalPlan`], an [`ExecutionPlan`] has additional concrete
375//! information about how to perform calculations (e.g. hash vs merge
376//! join), and how data flows during execution (e.g. partitioning and
377//! sortedness).
378//!
379//! [cp_solver] performs range propagation analysis on [`PhysicalExpr`]s and
380//! [`PruningPredicate`] can prove certain boolean [`PhysicalExpr`]s used for
381//! filtering can never be `true` using additional statistical information.
382//!
383//! [cp_solver]: crate::physical_expr::intervals::cp_solver
384//! [`PruningPredicate`]: datafusion_physical_optimizer::pruning::PruningPredicate
385//! [`PhysicalExpr`]: crate::physical_plan::PhysicalExpr
386//!
387//! ## Execution
388//!
389//! ```text
390//! ExecutionPlan::execute Calling next() on the
391//! produces a stream stream produces the data
392//!
393//! ββββββββββββββββββ βββββββββββββββββββββββββββ ββββββββββββββ
394//! βProjectExec β βimpl β βββββΆβRecordBatch β
395//! β ... βββββββΆβSendableRecordBatchStreamββββββ€ ββββββββββββββ
396//! β DataSourceExecβ β β β ββββββββββββββ
397//! ββββββββββββββββββ βββββββββββββββββββββββββββ βββββΆβRecordBatch β
398//! β² β ββββββββββββββ
399//! ExecutionPlan β β ...
400//! β β
401//! β β ββββββββββββββ
402//! PhysicalOptimizerRules βββββΆβRecordBatch β
403//! request information β ββββββββββββββ
404//! such as partitioning β β β β β β β β
405//! βββββΆ None β
406//! β β β β β β β
407//! ```
408//!
409//! [`ExecutionPlan`]s process data using the [Apache Arrow] memory
410//! format, making heavy use of functions from the [arrow]
411//! crate. Values are represented with [`ColumnarValue`], which are either
412//! [`ScalarValue`] (single constant values) or [`ArrayRef`] (Arrow
413//! Arrays).
414//!
415//! Calling [`execute`] produces 1 or more partitions of data,
416//! as a [`SendableRecordBatchStream`], which implements a pull based execution
417//! API. Calling [`next()`]`.await` will incrementally compute and return the next
418//! [`RecordBatch`]. Balanced parallelism is achieved using [Volcano style]
419//! "Exchange" operations implemented by [`RepartitionExec`].
420//!
421//! While some recent research such as [Morsel-Driven Parallelism] describes challenges
422//! with the pull style Volcano execution model on NUMA architectures, in practice DataFusion achieves
423//! similar scalability as systems that use push driven schedulers [such as DuckDB].
424//! See the [DataFusion paper in SIGMOD 2024] for more details.
425//!
426//! [`execute`]: physical_plan::ExecutionPlan::execute
427//! [`SendableRecordBatchStream`]: crate::physical_plan::SendableRecordBatchStream
428//! [`ColumnarValue`]: datafusion_expr::ColumnarValue
429//! [`ScalarValue`]: crate::scalar::ScalarValue
430//! [`ArrayRef`]: arrow::array::ArrayRef
431//! [`Stream`]: futures::stream::Stream
432//!
433//! See the [implementors of `ExecutionPlan`] for a list of physical operators available.
434//!
435//! [`RepartitionExec`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html
436//! [Volcano style]: https://doi.org/10.1145/93605.98720
437//! [Morsel-Driven Parallelism]: https://db.in.tum.de/~leis/papers/morsels.pdf
438//! [DataFusion paper in SIGMOD 2024]: https://github.com/apache/datafusion/files/15149988/DataFusion_Query_Engine___SIGMOD_2024-FINAL-mk4.pdf
439//! [such as DuckDB]: https://github.com/duckdb/duckdb/issues/1583
440//! [implementors of `ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#implementors
441//!
442//! ## Streaming Execution
443//!
444//! DataFusion is a "streaming" query engine which means [`ExecutionPlan`]s incrementally
445//! read from their input(s) and compute output one [`RecordBatch`] at a time
446//! by continually polling [`SendableRecordBatchStream`]s. Output and
447//! intermediate [`RecordBatch`]s each have approximately `batch_size` rows,
448//! which amortizes per-batch overhead of execution.
449//!
450//! Note that certain operations, sometimes called "pipeline breakers",
451//! (for example full sorts or hash aggregations) are fundamentally non streaming and
452//! must read their input fully before producing **any** output. As much as possible,
453//! other operators read a single [`RecordBatch`] from their input to produce a
454//! single [`RecordBatch`] as output.
455//!
456//! For example, given this SQL:
457//!
458//! ```sql
459//! SELECT name FROM 'data.parquet' WHERE id > 10
460//! ```
461//!
462//! An simplified DataFusion execution plan is shown below. It first reads
463//! data from the Parquet file, then applies the filter, then the projection,
464//! and finally produces output. Each step processes one [`RecordBatch`] at a
465//! time. Multiple batches are processed concurrently on different CPU cores
466//! for plans with multiple partitions.
467//!
468//! ```text
469//! βββββββββββββββ ββββββββββββββββ ββββββββββββββββββ ββββββββββββββββββββ ββββββββββββ
470//! β Parquet βββββΆβ DataSource βββββΆβ FilterExec βββββΆβ ProjectionExec βββββΆβ Results β
471//! β File β β β β β β β β β
472//! βββββββββββββββ ββββββββββββββββ ββββββββββββββββββ ββββββββββββββββββββ ββββββββββββ
473//! (reads data) (id > 10) (keeps "name" col)
474//! RecordBatch ββββΆ RecordBatch βββββΆ RecordBatch βββββΆ RecordBatch
475//! ```
476//!
477//! DataFusion uses the classic "pull" based control flow (explained more in the
478//! next section) to implement streaming execution. As an example,
479//! consider the following SQL query:
480//!
481//! ```sql
482//! SELECT date_trunc('month', time) FROM data WHERE id IN (10,20,30);
483//! ```
484//!
485//! The diagram below shows the call sequence when a consumer calls [`next()`] to
486//! get the next [`RecordBatch`] of output. While it is possible that some
487//! steps run on different threads, typically tokio will use the same thread
488//! that called [`next()`] to read from the input, apply the filter, and
489//! return the results without interleaving any other operations. This results
490//! in excellent cache locality as the same CPU core that produces the data often
491//! consumes it immediately as well.
492//!
493//! ```text
494//!
495//! Step 3: FilterExec calls next() Step 2: ProjectionExec calls
496//! on input Stream next() on input Stream
497//! β β β β β β β β β β β β β β β β β β β β β β β β β
498//! β Step 1: Consumer
499//! βΌ βΌ β calls next()
500//! ββββββββββββββββββ βββββββ»ββββββββββββββ ββββββββββββββββββββββββββ
501//! β β β β β β β β β β β β β β β β β
502//! β DataSource β β β β β
503//! β (e.g. β β FilterExec β β ProjectionExec β
504//! β ParquetSource) β βid IN (10, 20, 30) β βdate_bin('month', time) β
505//! β β β β β β£ β β β β β β β β β β βΆ
506//! β β β β β β
507//! ββββββββββββββββββ βββββββββββββ³ββββββββ ββββββββββββββββββββββββββ
508//! β β² β² Step 6: ProjectionExec
509//! β β β computes date_trunc into a
510//! β β β β β β β β β β β β β β β β β β β β β β β β new RecordBatch returned
511//! βββββββββββββββββββββββ βββββββββββββββ from client
512//! β RecordBatch β β RecordBatch β
513//! βββββββββββββββββββββββ βββββββββββββββ
514//!
515//! Step 4: DataSource returns a Step 5: FilterExec returns a new
516//! single RecordBatch RecordBatch with only matching rows
517//! ```
518//!
519//! [`next()`]: futures::StreamExt::next
520//!
521//! ## Thread Scheduling, CPU / IO Thread Pools, and [Tokio] [`Runtime`]s
522//!
523//! DataFusion automatically runs each plan with multiple CPU cores using
524//! a [Tokio] [`Runtime`] as a thread pool. While tokio is most commonly used
525//! for asynchronous network I/O, the combination of an efficient, work-stealing
526//! scheduler, and first class compiler support for automatic continuation
527//! generation (`async`) also makes it a compelling choice for CPU intensive
528//! applications as explained in the [Using Rustlangβs Async Tokio
529//! Runtime for CPU-Bound Tasks] blog.
530//!
531//! The number of cores used is determined by the `target_partitions`
532//! configuration setting, which defaults to the number of CPU cores.
533//! While preparing for execution, DataFusion tries to create this many distinct
534//! `async` [`Stream`]s for each [`ExecutionPlan`].
535//! The [`Stream`]s for certain [`ExecutionPlan`]s, such as [`RepartitionExec`]
536//! and [`CoalescePartitionsExec`], spawn [Tokio] [`task`]s, that run on
537//! threads managed by the [`Runtime`].
538//! Many DataFusion [`Stream`]s perform CPU intensive processing.
539//!
540//! ### Cooperative Scheduling
541//!
542//! DataFusion uses cooperative scheduling, which means that each [`Stream`]
543//! is responsible for yielding control back to the [`Runtime`] after
544//! some amount of work is done. Please see the [`coop`] module documentation
545//! for more details.
546//!
547//! [`coop`]: datafusion_physical_plan::coop
548//!
549//! ### Network I/O and CPU intensive tasks
550//!
551//! Using `async` for CPU intensive tasks makes it easy for [`TableProvider`]s
552//! to perform network I/O using standard Rust `async` during execution.
553//! However, this design also makes it very easy to mix CPU intensive and latency
554//! sensitive I/O work on the same thread pool ([`Runtime`]).
555//! Using the same (default) [`Runtime`] is convenient, and often works well for
556//! initial development and processing local files, but it can lead to problems
557//! under load and/or when reading from network sources such as AWS S3.
558//!
559//! ### Optimizing Latency: Throttled CPU / IO under Highly Concurrent Load
560//!
561//! If your system does not fully utilize either the CPU or network bandwidth
562//! during execution, or you see significantly higher tail (e.g. p99) latencies
563//! responding to network requests, **it is likely you need to use a different
564//! [`Runtime`] for DataFusion plans**. The [thread_pools example]
565//! has an example of how to do so.
566//!
567//! As shown below, using the same [`Runtime`] for both CPU intensive processing
568//! and network requests can introduce significant delays in responding to
569//! those network requests. Delays in processing network requests can and does
570//! lead network flow control to throttle the available bandwidth in response.
571//! This effect can be especially pronounced when running multiple queries
572//! concurrently.
573//!
574//! ```text
575//! Legend
576//!
577//! ββββββββ
578//! Processing network request β β CPU bound work
579//! is delayed due to processing ββββββββ
580//! CPU bound work βββ
581//! β β Network request
582//! ββ βββ processing
583//!
584//! ββ
585//! β β β β β β β β β β β β β β β β β β β β β β
586//! β β
587//!
588//! βΌ βΌ
589//! βββββββββββββββ βββββββββββββββββββββββββββββββββββββββββββββββββββ
590//! β βthread 1 β ββ ββ Decoding ββ Filtering ββ β
591//! β β βββββββββββββββββββββββββββββββββββββββββββββββββββ
592//! β β ββββββββββββββββ³ββββββββββββββββββββ³βββββββββββββββ
593//! βTokio Runtimeβthread 2 β Decoding β Filtering β Decoding β ...
594//! β(thread pool)β ββββββββββββββββ»ββββββββββββββββββββ»βββββββββββββββ
595//! β β ... ...
596//! β β βββββββββββββββββββββ³βββββββββββββββββββββββ ββββββββββββββββ
597//! β βthread N β Decoding β Filtering ββ β β Decoding β
598//! βββββββββββββββ βββββββββββββββββββββ»βββββββββββββββββββββββ ββββββββββββββββ
599//! ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββΆ
600//! time
601//! ```
602//!
603//! The bottleneck resulting from network throttling can be avoided
604//! by using separate [`Runtime`]s for the different types of work, as shown
605//! in the diagram below.
606//!
607//! ```text
608//! A separate thread pool processes network Legend
609//! requests, reducing the latency for
610//! processing each request ββββββββ
611//! β β CPU bound work
612//! β ββββββββ
613//! β βββ
614//! β β β β β β β β Network request
615//! β β β β β βββ processing
616//! β
617//! βΌ βΌ
618//! βββββββββββββββ βββββββββ
619//! β βthread 1 β ββ ββ β
620//! β β βββββββββ
621//! βTokio Runtimeβ ...
622//! β(thread pool)βthread 2
623//! β β
624//! β"IO Runtime" β ...
625//! β β βββ
626//! β βthread N β β
627//! βββββββββββββββ βββ
628//! ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββΆ
629//! time
630//!
631//! βββββββββββββββ ββββββββββββββββββββββββββββββββββββββββββ
632//! β βthread 1 β Decoding ββ Filtering β
633//! β β ββββββββββββββββββββββββββββββββββββββββββ
634//! βTokio Runtimeβ ββββββββββββββββ³ββββββββββββββββββββ³βββββββββββββββ
635//! β(thread pool)βthread 2 β Decoding β Filtering β Decoding β ...
636//! β β ββββββββββββββββ»ββββββββββββββββββββ»βββββββββββββββ
637//! β CPU Runtime β ... ...
638//! β β βββββββββββββββββββββ³ββββββββββββββββββββ³βββββββββββββββ
639//! β βthread N β Decoding β Filtering β Decoding β
640//! βββββββββββββββ βββββββββββββββββββββ»ββββββββββββββββββββ»βββββββββββββββ
641//! ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββΆ
642//! time
643//! ```
644//!
645//! Note that DataFusion does not use [`tokio::task::spawn_blocking`] for
646//! CPU-bounded work, because `spawn_blocking` is designed for blocking **IO**,
647//! not designed CPU bound tasks. Among other challenges, spawned blocking
648//! tasks can't yield waiting for input (can't call `await`) so they
649//! can't be used to limit the number of concurrent CPU bound tasks or
650//! keep the processing pipeline to the same core.
651//!
652//! [Tokio]: https://tokio.rs
653//! [`Runtime`]: tokio::runtime::Runtime
654//! [thread_pools example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/thread_pools.rs
655//! [`task`]: tokio::task
656//! [Using Rustlangβs Async Tokio Runtime for CPU-Bound Tasks]: https://thenewstack.io/using-rustlangs-async-tokio-runtime-for-cpu-bound-tasks/
657//! [`RepartitionExec`]: physical_plan::repartition::RepartitionExec
658//! [`CoalescePartitionsExec`]: physical_plan::coalesce_partitions::CoalescePartitionsExec
659//!
660//! ## State Management and Configuration
661//!
662//! [`ConfigOptions`] contain options to control DataFusion's
663//! execution.
664//!
665//! [`ConfigOptions`]: datafusion_common::config::ConfigOptions
666//!
667//! The state required to execute queries is managed by the following
668//! structures:
669//!
670//! 1. [`SessionContext`]: State needed to create [`LogicalPlan`]s such
671//! as the table definitions and the function registries.
672//!
673//! 2. [`TaskContext`]: State needed for execution such as the
674//! [`MemoryPool`], [`DiskManager`], and [`ObjectStoreRegistry`].
675//!
676//! 3. [`ExecutionProps`]: Per-execution properties and data (such as
677//! starting timestamps, etc).
678//!
679//! [`SessionContext`]: crate::execution::context::SessionContext
680//! [`TaskContext`]: crate::execution::context::TaskContext
681//! [`ExecutionProps`]: crate::execution::context::ExecutionProps
682//!
683//! ### Resource Management
684//!
685//! The amount of memory and temporary local disk space used by
686//! DataFusion when running a plan can be controlled using the
687//! [`MemoryPool`] and [`DiskManager`]. Other runtime options can be
688//! found on [`RuntimeEnv`].
689//!
690//! [`DiskManager`]: crate::execution::DiskManager
691//! [`MemoryPool`]: crate::execution::memory_pool::MemoryPool
692//! [`RuntimeEnv`]: crate::execution::runtime_env::RuntimeEnv
693//! [`ObjectStoreRegistry`]: crate::datasource::object_store::ObjectStoreRegistry
694//!
695//! ## Crate Organization
696//!
697//! Most users interact with DataFusion via this crate (`datafusion`), which re-exports
698//! all functionality needed to build and execute queries.
699//!
700//! There are three other crates that provide additional functionality that
701//! must be used directly:
702//! * [`datafusion_proto`]: Plan serialization and deserialization
703//! * [`datafusion_substrait`]: Support for the substrait plan serialization format
704//! * [`datafusion_sqllogictest`] : The DataFusion SQL logic test runner
705//!
706//! [`datafusion_proto`]: https://crates.io/crates/datafusion-proto
707//! [`datafusion_substrait`]: https://crates.io/crates/datafusion-substrait
708//! [`datafusion_sqllogictest`]: https://crates.io/crates/datafusion-sqllogictest
709//!
710//! DataFusion is internally split into multiple sub crates to
711//! enforce modularity and improve compilation times. See the
712//! [list of modules](#modules) for all available sub-crates. Major ones are
713//!
714//! * [datafusion_common]: Common traits and types
715//! * [datafusion_catalog]: Catalog APIs such as [`SchemaProvider`] and [`CatalogProvider`]
716//! * [datafusion_datasource]: File and Data IO such as [`FileSource`] and [`DataSink`]
717//! * [datafusion_session]: [`Session`] and related structures
718//! * [datafusion_execution]: State and structures needed for execution
719//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
720//! * [datafusion_functions]: Scalar function packages
721//! * [datafusion_functions_aggregate]: Aggregate functions such as `MIN`, `MAX`, `SUM`, etc
722//! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s
723//! * [datafusion_functions_table]: Table Functions such as `GENERATE_SERIES`
724//! * [datafusion_functions_window]: Window functions such as `ROW_NUMBER`, `RANK`, etc
725//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
726//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
727//! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions
728//! * [datafusion_physical_optimizer]: [`ExecutionPlan`] and related expressions
729//! * [datafusion_sql]: SQL planner ([`SqlToRel`])
730//!
731//! [`SchemaProvider`]: datafusion_catalog::SchemaProvider
732//! [`CatalogProvider`]: datafusion_catalog::CatalogProvider
733//! [`Session`]: datafusion_session::Session
734//! [`FileSource`]: datafusion_datasource::file::FileSource
735//! [`DataSink`]: datafusion_datasource::sink::DataSink
736//!
737//! ## Citing DataFusion in Academic Papers
738//!
739//! You can use the following citation to reference DataFusion in academic papers:
740//!
741//! ```text
742//! @inproceedings{lamb2024apache
743//! title={Apache Arrow DataFusion: A Fast, Embeddable, Modular Analytic Query Engine},
744//! author={Lamb, Andrew and Shen, Yijie and Heres, Dani{\"e}l and Chakraborty, Jayjeet and Kabak, Mehmet Ozan and Hsieh, Liang-Chi and Sun, Chao},
745//! booktitle={Companion of the 2024 International Conference on Management of Data},
746//! pages={5--17},
747//! year={2024}
748//! }
749//! ```
750//!
751//! [sqlparser]: https://docs.rs/sqlparser/latest/sqlparser
752//! [`SqlToRel`]: sql::planner::SqlToRel
753//! [`Expr`]: datafusion_expr::Expr
754//! [`LogicalPlan`]: datafusion_expr::LogicalPlan
755//! [`AnalyzerRule`]: datafusion_optimizer::analyzer::AnalyzerRule
756//! [`OptimizerRule`]: optimizer::optimizer::OptimizerRule
757//! [`ExecutionPlan`]: physical_plan::ExecutionPlan
758//! [`PhysicalPlanner`]: physical_planner::PhysicalPlanner
759//! [`PhysicalOptimizerRule`]: datafusion_physical_optimizer::PhysicalOptimizerRule
760//! [`Schema`]: arrow::datatypes::Schema
761//! [`PhysicalExpr`]: physical_plan::PhysicalExpr
762//! [`RecordBatch`]: arrow::array::RecordBatch
763//! [`RecordBatchReader`]: arrow::record_batch::RecordBatchReader
764//! [`Array`]: arrow::array::Array
765
766/// DataFusion crate version
767pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION");
768
769extern crate core;
770
771#[cfg(feature = "sql")]
772extern crate sqlparser;
773
774pub mod dataframe;
775pub mod datasource;
776pub mod error;
777pub mod execution;
778pub mod physical_planner;
779pub mod prelude;
780pub mod scalar;
781
782// Re-export dependencies that are part of DataFusion public API (e.g. via DataFusionError)
783pub use arrow;
784pub use object_store;
785
786#[cfg(feature = "parquet")]
787pub use parquet;
788
789#[cfg(feature = "avro")]
790pub use datafusion_datasource_avro::apache_avro;
791
792// re-export DataFusion sub-crates at the top level. Use `pub use *`
793// so that the contents of the subcrates appears in rustdocs
794// for details, see https://github.com/apache/datafusion/issues/6648
795
796/// re-export of [`datafusion_common`] crate
797pub mod common {
798 pub use datafusion_common::*;
799
800 /// re-export of [`datafusion_common_runtime`] crate
801 pub mod runtime {
802 pub use datafusion_common_runtime::*;
803 }
804}
805
806// Backwards compatibility
807pub use common::config;
808
809// NB datafusion execution is re-exported in the `execution` module
810
811/// re-export of [`datafusion_catalog`] crate
812pub mod catalog {
813 pub use datafusion_catalog::*;
814}
815
816/// re-export of [`datafusion_expr`] crate
817pub mod logical_expr {
818 pub use datafusion_expr::*;
819}
820
821/// re-export of [`datafusion_expr_common`] crate
822pub mod logical_expr_common {
823 pub use datafusion_expr_common::*;
824}
825
826/// re-export of [`datafusion_optimizer`] crate
827pub mod optimizer {
828 pub use datafusion_optimizer::*;
829}
830
831/// re-export of [`datafusion_physical_optimizer`] crate
832pub mod physical_optimizer {
833 pub use datafusion_physical_optimizer::*;
834}
835
836/// re-export of [`datafusion_physical_expr`] crate
837pub mod physical_expr_common {
838 pub use datafusion_physical_expr_common::*;
839}
840
841/// re-export of [`datafusion_physical_expr`] crate
842pub mod physical_expr {
843 pub use datafusion_physical_expr::*;
844}
845
846/// re-export of [`datafusion_physical_expr_adapter`] crate
847pub mod physical_expr_adapter {
848 pub use datafusion_physical_expr_adapter::*;
849}
850
851/// re-export of [`datafusion_physical_plan`] crate
852pub mod physical_plan {
853 pub use datafusion_physical_plan::*;
854}
855
856// Reexport testing macros for compatibility
857pub use datafusion_common::assert_batches_eq;
858pub use datafusion_common::assert_batches_sorted_eq;
859
860/// re-export of [`datafusion_sql`] crate
861#[cfg(feature = "sql")]
862pub mod sql {
863 pub use datafusion_sql::*;
864}
865
866/// re-export of [`datafusion_functions`] crate
867pub mod functions {
868 pub use datafusion_functions::*;
869}
870
871/// re-export of [`datafusion_functions_nested`] crate, if "nested_expressions" feature is enabled
872pub mod functions_nested {
873 #[cfg(feature = "nested_expressions")]
874 pub use datafusion_functions_nested::*;
875}
876
877/// re-export of [`datafusion_functions_aggregate`] crate
878pub mod functions_aggregate {
879 pub use datafusion_functions_aggregate::*;
880}
881
882/// re-export of [`datafusion_functions_window`] crate
883pub mod functions_window {
884 pub use datafusion_functions_window::*;
885}
886
887/// re-export of [`datafusion_functions_table`] crate
888pub mod functions_table {
889 pub use datafusion_functions_table::*;
890}
891
892/// re-export of variable provider for `@name` and `@@name` style runtime values.
893pub mod variable {
894 pub use datafusion_expr::var_provider::{VarProvider, VarType};
895}
896
897#[cfg(not(target_arch = "wasm32"))]
898pub mod test;
899
900mod schema_equivalence;
901pub mod test_util;
902
903#[cfg(doctest)]
904doc_comment::doctest!("../../../README.md", readme_example_test);
905
906// Instructions for Documentation Examples
907//
908// The following commands test the examples from the user guide as part of
909// `cargo test --doc`
910//
911// # Adding new tests:
912//
913// Simply add code like this to your .md file and ensure your md file is
914// included in the lists below.
915//
916// ```rust
917// <code here will be tested>
918// ```
919//
920// Note that sometimes it helps to author the doctest as a standalone program
921// first, and then copy it into the user guide.
922//
923// # Debugging Test Failures
924//
925// Unfortunately, the line numbers reported by doctest do not correspond to the
926// line numbers of in the .md files. Thus, if a doctest fails, use the name of
927// the test to find the relevant file in the list below, and then find the
928// example in that file to fix.
929//
930// For example, if `user_guide_expressions(line 123)` fails,
931// go to `docs/source/user-guide/expressions.md` to find the relevant problem.
932//
933#[cfg(doctest)]
934doc_comment::doctest!(
935 "../../../docs/source/user-guide/arrow-introduction.md",
936 user_guide_arrow_introduction
937);
938
939#[cfg(doctest)]
940doc_comment::doctest!(
941 "../../../docs/source/user-guide/concepts-readings-events.md",
942 user_guide_concepts_readings_events
943);
944
945#[cfg(doctest)]
946doc_comment::doctest!(
947 "../../../docs/source/user-guide/configs.md",
948 user_guide_configs
949);
950
951#[cfg(doctest)]
952doc_comment::doctest!(
953 "../../../docs/source/user-guide/crate-configuration.md",
954 user_guide_crate_configuration
955);
956
957#[cfg(doctest)]
958doc_comment::doctest!(
959 "../../../docs/source/user-guide/dataframe.md",
960 user_guide_dataframe
961);
962
963#[cfg(doctest)]
964doc_comment::doctest!(
965 "../../../docs/source/user-guide/example-usage.md",
966 user_guide_example_usage
967);
968
969#[cfg(doctest)]
970doc_comment::doctest!(
971 "../../../docs/source/user-guide/explain-usage.md",
972 user_guide_explain_usage
973);
974
975#[cfg(doctest)]
976doc_comment::doctest!(
977 "../../../docs/source/user-guide/expressions.md",
978 user_guide_expressions
979);
980
981#[cfg(doctest)]
982doc_comment::doctest!("../../../docs/source/user-guide/faq.md", user_guide_faq);
983
984#[cfg(doctest)]
985doc_comment::doctest!(
986 "../../../docs/source/user-guide/introduction.md",
987 user_guide_introduction
988);
989
990#[cfg(doctest)]
991doc_comment::doctest!(
992 "../../../docs/source/user-guide/cli/datasources.md",
993 user_guide_cli_datasource
994);
995
996#[cfg(doctest)]
997doc_comment::doctest!(
998 "../../../docs/source/user-guide/cli/installation.md",
999 user_guide_cli_installation
1000);
1001
1002#[cfg(doctest)]
1003doc_comment::doctest!(
1004 "../../../docs/source/user-guide/cli/overview.md",
1005 user_guide_cli_overview
1006);
1007
1008#[cfg(doctest)]
1009doc_comment::doctest!(
1010 "../../../docs/source/user-guide/cli/usage.md",
1011 user_guide_cli_usage
1012);
1013
1014#[cfg(doctest)]
1015doc_comment::doctest!(
1016 "../../../docs/source/user-guide/features.md",
1017 user_guide_features
1018);
1019
1020#[cfg(doctest)]
1021doc_comment::doctest!(
1022 "../../../docs/source/user-guide/sql/aggregate_functions.md",
1023 user_guide_sql_aggregate_functions
1024);
1025
1026#[cfg(doctest)]
1027doc_comment::doctest!(
1028 "../../../docs/source/user-guide/sql/data_types.md",
1029 user_guide_sql_data_types
1030);
1031
1032#[cfg(doctest)]
1033doc_comment::doctest!(
1034 "../../../docs/source/user-guide/sql/ddl.md",
1035 user_guide_sql_ddl
1036);
1037
1038#[cfg(doctest)]
1039doc_comment::doctest!(
1040 "../../../docs/source/user-guide/sql/dml.md",
1041 user_guide_sql_dml
1042);
1043
1044#[cfg(doctest)]
1045doc_comment::doctest!(
1046 "../../../docs/source/user-guide/sql/explain.md",
1047 user_guide_sql_exmplain
1048);
1049
1050#[cfg(doctest)]
1051doc_comment::doctest!(
1052 "../../../docs/source/user-guide/sql/information_schema.md",
1053 user_guide_sql_information_schema
1054);
1055
1056#[cfg(doctest)]
1057doc_comment::doctest!(
1058 "../../../docs/source/user-guide/sql/operators.md",
1059 user_guide_sql_operators
1060);
1061
1062#[cfg(doctest)]
1063doc_comment::doctest!(
1064 "../../../docs/source/user-guide/sql/prepared_statements.md",
1065 user_guide_prepared_statements
1066);
1067
1068#[cfg(doctest)]
1069doc_comment::doctest!(
1070 "../../../docs/source/user-guide/sql/scalar_functions.md",
1071 user_guide_sql_scalar_functions
1072);
1073
1074#[cfg(doctest)]
1075doc_comment::doctest!(
1076 "../../../docs/source/user-guide/sql/select.md",
1077 user_guide_sql_select
1078);
1079
1080#[cfg(doctest)]
1081doc_comment::doctest!(
1082 "../../../docs/source/user-guide/sql/special_functions.md",
1083 user_guide_sql_special_functions
1084);
1085
1086#[cfg(doctest)]
1087doc_comment::doctest!(
1088 "../../../docs/source/user-guide/sql/subqueries.md",
1089 user_guide_sql_subqueries
1090);
1091
1092#[cfg(doctest)]
1093doc_comment::doctest!(
1094 "../../../docs/source/user-guide/sql/window_functions.md",
1095 user_guide_sql_window_functions
1096);
1097
1098#[cfg(doctest)]
1099doc_comment::doctest!(
1100 "../../../docs/source/user-guide/sql/format_options.md",
1101 user_guide_sql_format_options
1102);
1103
1104#[cfg(doctest)]
1105doc_comment::doctest!(
1106 "../../../docs/source/library-user-guide/functions/adding-udfs.md",
1107 library_user_guide_functions_adding_udfs
1108);
1109
1110#[cfg(doctest)]
1111doc_comment::doctest!(
1112 "../../../docs/source/library-user-guide/functions/spark.md",
1113 library_user_guide_functions_spark
1114);
1115
1116#[cfg(doctest)]
1117doc_comment::doctest!(
1118 "../../../docs/source/library-user-guide/building-logical-plans.md",
1119 library_user_guide_building_logical_plans
1120);
1121
1122#[cfg(doctest)]
1123doc_comment::doctest!(
1124 "../../../docs/source/library-user-guide/catalogs.md",
1125 library_user_guide_catalogs
1126);
1127
1128#[cfg(doctest)]
1129doc_comment::doctest!(
1130 "../../../docs/source/library-user-guide/custom-table-providers.md",
1131 library_user_guide_custom_table_providers
1132);
1133
1134#[cfg(doctest)]
1135doc_comment::doctest!(
1136 "../../../docs/source/library-user-guide/extending-operators.md",
1137 library_user_guide_extending_operators
1138);
1139
1140#[cfg(doctest)]
1141doc_comment::doctest!(
1142 "../../../docs/source/library-user-guide/extensions.md",
1143 library_user_guide_extensions
1144);
1145
1146#[cfg(doctest)]
1147doc_comment::doctest!(
1148 "../../../docs/source/library-user-guide/index.md",
1149 library_user_guide_index
1150);
1151
1152#[cfg(doctest)]
1153doc_comment::doctest!(
1154 "../../../docs/source/library-user-guide/profiling.md",
1155 library_user_guide_profiling
1156);
1157
1158#[cfg(doctest)]
1159doc_comment::doctest!(
1160 "../../../docs/source/library-user-guide/query-optimizer.md",
1161 library_user_guide_query_optimizer
1162);
1163
1164#[cfg(doctest)]
1165doc_comment::doctest!(
1166 "../../../docs/source/library-user-guide/using-the-dataframe-api.md",
1167 library_user_guide_dataframe_api
1168);
1169
1170#[cfg(doctest)]
1171doc_comment::doctest!(
1172 "../../../docs/source/library-user-guide/using-the-sql-api.md",
1173 library_user_guide_sql_api
1174);
1175
1176#[cfg(doctest)]
1177doc_comment::doctest!(
1178 "../../../docs/source/library-user-guide/working-with-exprs.md",
1179 library_user_guide_working_with_exprs
1180);
1181
1182#[cfg(doctest)]
1183doc_comment::doctest!(
1184 "../../../docs/source/library-user-guide/upgrading.md",
1185 library_user_guide_upgrading
1186);
1187
1188#[cfg(doctest)]
1189doc_comment::doctest!(
1190 "../../../docs/source/contributor-guide/api-health.md",
1191 contributor_guide_api_health
1192);