Skip to main content

datafusion_common/
format.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::{self, Display};
19use std::str::FromStr;
20
21use arrow::compute::CastOptions;
22use arrow::util::display::{DurationFormat, FormatOptions};
23
24use crate::config::{ConfigField, Visit};
25use crate::error::{DataFusionError, Result};
26
27/// The default [`FormatOptions`] to use within DataFusion
28/// Also see [`crate::config::FormatOptions`]
29pub const DEFAULT_FORMAT_OPTIONS: FormatOptions<'static> =
30    FormatOptions::new().with_duration_format(DurationFormat::Pretty);
31
32/// The default [`CastOptions`] to use within DataFusion
33pub const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions {
34    safe: false,
35    format_options: DEFAULT_FORMAT_OPTIONS,
36};
37
38/// Output formats for controlling for Explain plans
39#[derive(Debug, Clone, PartialEq, Eq, Hash)]
40pub enum ExplainFormat {
41    /// Indent mode
42    ///
43    /// Example:
44    /// ```text
45    /// > explain format indent select x from values (1) t(x);
46    /// +---------------+-----------------------------------------------------+
47    /// | plan_type     | plan                                                |
48    /// +---------------+-----------------------------------------------------+
49    /// | logical_plan  | SubqueryAlias: t                                    |
50    /// |               |   Projection: column1 AS x                          |
51    /// |               |     Values: (Int64(1))                              |
52    /// | physical_plan | ProjectionExec: expr=[column1@0 as x]               |
53    /// |               |   DataSourceExec: partitions=1, partition_sizes=[1] |
54    /// |               |                                                     |
55    /// +---------------+-----------------------------------------------------+
56    /// ```
57    Indent,
58    /// Tree mode
59    ///
60    /// Example:
61    /// ```text
62    /// > explain format tree select x from values (1) t(x);
63    /// +---------------+-------------------------------+
64    /// | plan_type     | plan                          |
65    /// +---------------+-------------------------------+
66    /// | physical_plan | ┌───────────────────────────┐ |
67    /// |               | │       ProjectionExec      │ |
68    /// |               | │    --------------------   │ |
69    /// |               | │        x: column1@0       │ |
70    /// |               | └─────────────┬─────────────┘ |
71    /// |               | ┌─────────────┴─────────────┐ |
72    /// |               | │       DataSourceExec      │ |
73    /// |               | │    --------------------   │ |
74    /// |               | │         bytes: 128        │ |
75    /// |               | │       format: memory      │ |
76    /// |               | │          rows: 1          │ |
77    /// |               | └───────────────────────────┘ |
78    /// |               |                               |
79    /// +---------------+-------------------------------+
80    /// ```
81    Tree,
82    /// Postgres Json mode
83    ///
84    /// A displayable structure that produces plan in postgresql JSON format.
85    ///
86    /// Users can use this format to visualize the plan in existing plan
87    /// visualization tools, for example [dalibo](https://explain.dalibo.com/)
88    ///
89    /// Example:
90    /// ```text
91    /// > explain format pgjson select x from values (1) t(x);
92    /// +--------------+--------------------------------------+
93    /// | plan_type    | plan                                 |
94    /// +--------------+--------------------------------------+
95    /// | logical_plan | [                                    |
96    /// |              |   {                                  |
97    /// |              |     "Plan": {                        |
98    /// |              |       "Alias": "t",                  |
99    /// |              |       "Node Type": "Subquery",       |
100    /// |              |       "Output": [                    |
101    /// |              |         "x"                          |
102    /// |              |       ],                             |
103    /// |              |       "Plans": [                     |
104    /// |              |         {                            |
105    /// |              |           "Expressions": [           |
106    /// |              |             "column1 AS x"           |
107    /// |              |           ],                         |
108    /// |              |           "Node Type": "Projection", |
109    /// |              |           "Output": [                |
110    /// |              |             "x"                      |
111    /// |              |           ],                         |
112    /// |              |           "Plans": [                 |
113    /// |              |             {                        |
114    /// |              |               "Node Type": "Values", |
115    /// |              |               "Output": [            |
116    /// |              |                 "column1"            |
117    /// |              |               ],                     |
118    /// |              |               "Plans": [],           |
119    /// |              |               "Values": "(Int64(1))" |
120    /// |              |             }                        |
121    /// |              |           ]                          |
122    /// |              |         }                            |
123    /// |              |       ]                              |
124    /// |              |     }                                |
125    /// |              |   }                                  |
126    /// |              | ]                                    |
127    /// +--------------+--------------------------------------+
128    /// ```
129    PostgresJSON,
130    /// Graphviz mode
131    ///
132    /// Example:
133    /// ```text
134    /// > explain format graphviz select x from values (1) t(x);
135    /// +--------------+------------------------------------------------------------------------+
136    /// | plan_type    | plan                                                                   |
137    /// +--------------+------------------------------------------------------------------------+
138    /// | logical_plan |                                                                        |
139    /// |              | // Begin DataFusion GraphViz Plan,                                     |
140    /// |              | // display it online here: https://dreampuf.github.io/GraphvizOnline   |
141    /// |              |                                                                        |
142    /// |              | digraph {                                                              |
143    /// |              |   subgraph cluster_1                                                   |
144    /// |              |   {                                                                    |
145    /// |              |     graph[label="LogicalPlan"]                                         |
146    /// |              |     2[shape=box label="SubqueryAlias: t"]                              |
147    /// |              |     3[shape=box label="Projection: column1 AS x"]                      |
148    /// |              |     2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]                |
149    /// |              |     4[shape=box label="Values: (Int64(1))"]                            |
150    /// |              |     3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]                |
151    /// |              |   }                                                                    |
152    /// |              |   subgraph cluster_5                                                   |
153    /// |              |   {                                                                    |
154    /// |              |     graph[label="Detailed LogicalPlan"]                                |
155    /// |              |     6[shape=box label="SubqueryAlias: t\nSchema: [x:Int64;N]"]         |
156    /// |              |     7[shape=box label="Projection: column1 AS x\nSchema: [x:Int64;N]"] |
157    /// |              |     6 -> 7 [arrowhead=none, arrowtail=normal, dir=back]                |
158    /// |              |     8[shape=box label="Values: (Int64(1))\nSchema: [column1:Int64;N]"] |
159    /// |              |     7 -> 8 [arrowhead=none, arrowtail=normal, dir=back]                |
160    /// |              |   }                                                                    |
161    /// |              | }                                                                      |
162    /// |              | // End DataFusion GraphViz Plan                                        |
163    /// |              |                                                                        |
164    /// +--------------+------------------------------------------------------------------------+
165    /// ```
166    Graphviz,
167}
168
169/// Implement  parsing strings to `ExplainFormat`
170impl FromStr for ExplainFormat {
171    type Err = DataFusionError;
172
173    fn from_str(format: &str) -> Result<Self, Self::Err> {
174        match format.to_lowercase().as_str() {
175            "indent" => Ok(ExplainFormat::Indent),
176            "tree" => Ok(ExplainFormat::Tree),
177            "pgjson" => Ok(ExplainFormat::PostgresJSON),
178            "graphviz" => Ok(ExplainFormat::Graphviz),
179            _ => Err(DataFusionError::Configuration(format!(
180                "Invalid explain format. Expected 'indent', 'tree', 'pgjson' or 'graphviz'. Got '{format}'"
181            ))),
182        }
183    }
184}
185
186impl Display for ExplainFormat {
187    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188        let s = match self {
189            ExplainFormat::Indent => "indent",
190            ExplainFormat::Tree => "tree",
191            ExplainFormat::PostgresJSON => "pgjson",
192            ExplainFormat::Graphviz => "graphviz",
193        };
194        write!(f, "{s}")
195    }
196}
197
198impl ConfigField for ExplainFormat {
199    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
200        v.some(key, self, description)
201    }
202
203    fn set(&mut self, _: &str, value: &str) -> Result<()> {
204        *self = ExplainFormat::from_str(value)?;
205        Ok(())
206    }
207}
208
209/// Categorizes metrics so the display layer can choose the desired verbosity.
210///
211/// The `datafusion.explain.analyze_level` configuration controls which
212/// type is shown:
213/// - `"dev"` (the default): all metrics are shown.
214/// - `"summary"`: only metrics tagged as `Summary` are shown.
215///
216/// This is orthogonal to [`MetricCategory`], which filters by *what kind*
217/// of value a metric represents (rows / bytes / timing).
218///
219/// # Difference from `EXPLAIN ANALYZE VERBOSE`
220///
221/// The `VERBOSE` keyword controls whether per-partition metrics are shown
222/// (when specified) or aggregated metrics are displayed (when omitted).
223/// In contrast, `MetricType` determines which *levels* of metrics are
224/// displayed.
225#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
226pub enum MetricType {
227    /// Common metrics for high-level insights (answering which operator is slow)
228    Summary,
229    /// For deep operator-level introspection for developers
230    Dev,
231}
232
233impl MetricType {
234    /// Returns the set of metric types that should be shown for this level.
235    ///
236    /// `Dev` is a superset of `Summary`: when the user selects
237    /// `analyze_level = 'dev'`, both `Summary` and `Dev` metrics are shown.
238    pub fn included_types(self) -> Vec<MetricType> {
239        match self {
240            MetricType::Summary => vec![MetricType::Summary],
241            MetricType::Dev => vec![MetricType::Summary, MetricType::Dev],
242        }
243    }
244}
245
246impl FromStr for MetricType {
247    type Err = DataFusionError;
248
249    fn from_str(s: &str) -> Result<Self, Self::Err> {
250        match s.trim().to_lowercase().as_str() {
251            "summary" => Ok(Self::Summary),
252            "dev" => Ok(Self::Dev),
253            other => Err(DataFusionError::Configuration(format!(
254                "Invalid explain analyze level. Expected 'summary' or 'dev'. Got '{other}'"
255            ))),
256        }
257    }
258}
259
260impl Display for MetricType {
261    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262        match self {
263            Self::Summary => write!(f, "summary"),
264            Self::Dev => write!(f, "dev"),
265        }
266    }
267}
268
269impl ConfigField for MetricType {
270    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
271        v.some(key, self, description)
272    }
273
274    fn set(&mut self, _: &str, value: &str) -> Result<()> {
275        *self = MetricType::from_str(value)?;
276        Ok(())
277    }
278}
279
280/// Classifies a metric by what it measures.
281///
282/// This is orthogonal to [`MetricType`] (Summary / Dev), which controls
283/// *verbosity*. `MetricCategory` controls *what kind of value* is shown,
284/// so that `EXPLAIN ANALYZE` output can be narrowed to only the categories
285/// that are useful in a given context.
286///
287/// In particular this is useful for testing since metrics differ in their stability across runs:
288/// - [`Rows`](Self::Rows) and [`Bytes`](Self::Bytes) depend only on the plan
289///   and the data, so they are mostly deterministic across runs (given the same
290///   input). Variations can existing e.g. because of non-deterministic ordering
291///   of evaluation between threads.
292///   Running with a single target partition often makes these metrics stable enough to assert on in tests.
293/// - [`Timing`](Self::Timing) depends on hardware, system load, scheduling,
294///   etc., so it varies from run to run even on the same machine.
295///
296/// [`MetricCategory`] is especially useful in sqllogictest (`.slt`) files:
297/// setting `datafusion.explain.analyze_categories = 'rows'` lets a test
298/// assert on row-count metrics without sprinkling `<slt:ignore>` over every
299/// timing value.
300///
301/// Metrics that do not declare a category (the default for custom
302/// `Count` / `Gauge` metrics) are treated as
303/// [`Uncategorized`](Self::Uncategorized) for filtering purposes.
304#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
305pub enum MetricCategory {
306    /// Row counts and related dimensionless counters: `output_rows`,
307    /// `spilled_rows`, `output_batches`, pruning metrics, ratios, etc.
308    ///
309    /// Mostly deterministic given the same plan and data.
310    Rows,
311    /// Byte measurements: `output_bytes`, `spilled_bytes`,
312    /// `current_memory_usage`, `bytes_scanned`, etc.
313    ///
314    /// Mostly deterministic given the same plan and data.
315    Bytes,
316    /// Wall-clock durations and timestamps: `elapsed_compute`,
317    /// operator-defined `Time` metrics, `start_timestamp` /
318    /// `end_timestamp`, etc.
319    ///
320    /// **Non-deterministic** — varies across runs even on the same hardware.
321    Timing,
322    /// Catch-all for metrics that do not fit into [`Rows`](Self::Rows),
323    /// [`Bytes`](Self::Bytes), or [`Timing`](Self::Timing).
324    ///
325    /// Custom `Count` / `Gauge` metrics that are not explicitly assigned
326    /// a category are treated as `Uncategorized` for filtering purposes.
327    ///
328    /// This variant lets users explicitly include or exclude these
329    /// metrics, e.g.:
330    /// ```sql
331    /// SET datafusion.explain.analyze_categories = 'rows, bytes, uncategorized';
332    /// ```
333    Uncategorized,
334}
335
336impl FromStr for MetricCategory {
337    type Err = DataFusionError;
338
339    fn from_str(s: &str) -> Result<Self, Self::Err> {
340        match s.trim().to_lowercase().as_str() {
341            "rows" => Ok(Self::Rows),
342            "bytes" => Ok(Self::Bytes),
343            "timing" => Ok(Self::Timing),
344            "uncategorized" => Ok(Self::Uncategorized),
345            other => Err(DataFusionError::Configuration(format!(
346                "Invalid metric category '{other}'. \
347                 Expected 'rows', 'bytes', 'timing', or 'uncategorized'."
348            ))),
349        }
350    }
351}
352
353impl Display for MetricCategory {
354    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
355        match self {
356            Self::Rows => write!(f, "rows"),
357            Self::Bytes => write!(f, "bytes"),
358            Self::Timing => write!(f, "timing"),
359            Self::Uncategorized => write!(f, "uncategorized"),
360        }
361    }
362}
363
364/// Controls which [`MetricCategory`] values are shown in `EXPLAIN ANALYZE`.
365///
366/// Set via `SET datafusion.explain.analyze_categories = '...'`.
367///
368/// See [`MetricCategory`] for the determinism properties that motivate
369/// this filter.
370#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
371pub enum ExplainAnalyzeCategories {
372    /// Show all metrics regardless of category (the default).
373    #[default]
374    All,
375    /// Show only metrics whose category is in the list.
376    /// Metrics with no declared category are treated as
377    /// [`Uncategorized`](MetricCategory::Uncategorized) for filtering.
378    ///
379    /// An **empty** vec means "plan only" — suppress all metrics.
380    Only(Vec<MetricCategory>),
381}
382
383impl FromStr for ExplainAnalyzeCategories {
384    type Err = DataFusionError;
385
386    fn from_str(s: &str) -> Result<Self, Self::Err> {
387        let s = s.trim().to_lowercase();
388        match s.as_str() {
389            "all" => Ok(Self::All),
390            "none" => Ok(Self::Only(vec![])),
391            other => {
392                let mut cats = Vec::new();
393                for part in other.split(',') {
394                    cats.push(part.trim().parse::<MetricCategory>()?);
395                }
396                cats.dedup();
397                Ok(Self::Only(cats))
398            }
399        }
400    }
401}
402
403impl Display for ExplainAnalyzeCategories {
404    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
405        match self {
406            Self::All => write!(f, "all"),
407            Self::Only(cats) if cats.is_empty() => write!(f, "none"),
408            Self::Only(cats) => {
409                let mut first = true;
410                for cat in cats {
411                    if !first {
412                        write!(f, ",")?;
413                    }
414                    first = false;
415                    write!(f, "{cat}")?;
416                }
417                Ok(())
418            }
419        }
420    }
421}
422
423impl ConfigField for ExplainAnalyzeCategories {
424    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
425        v.some(key, self, description)
426    }
427
428    fn set(&mut self, _: &str, value: &str) -> Result<()> {
429        *self = ExplainAnalyzeCategories::from_str(value)?;
430        Ok(())
431    }
432}