datafusion_common/format.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::fmt::{self, Display};
19use std::str::FromStr;
20
21use arrow::compute::CastOptions;
22use arrow::util::display::{DurationFormat, FormatOptions};
23
24use crate::config::{ConfigField, Visit};
25use crate::error::{DataFusionError, Result};
26
27/// The default [`FormatOptions`] to use within DataFusion
28/// Also see [`crate::config::FormatOptions`]
29pub const DEFAULT_FORMAT_OPTIONS: FormatOptions<'static> =
30 FormatOptions::new().with_duration_format(DurationFormat::Pretty);
31
32/// The default [`CastOptions`] to use within DataFusion
33pub const DEFAULT_CAST_OPTIONS: CastOptions<'static> = CastOptions {
34 safe: false,
35 format_options: DEFAULT_FORMAT_OPTIONS,
36};
37
38/// Output formats for controlling for Explain plans
39#[derive(Debug, Clone, PartialEq, Eq, Hash)]
40pub enum ExplainFormat {
41 /// Indent mode
42 ///
43 /// Example:
44 /// ```text
45 /// > explain format indent select x from values (1) t(x);
46 /// +---------------+-----------------------------------------------------+
47 /// | plan_type | plan |
48 /// +---------------+-----------------------------------------------------+
49 /// | logical_plan | SubqueryAlias: t |
50 /// | | Projection: column1 AS x |
51 /// | | Values: (Int64(1)) |
52 /// | physical_plan | ProjectionExec: expr=[column1@0 as x] |
53 /// | | DataSourceExec: partitions=1, partition_sizes=[1] |
54 /// | | |
55 /// +---------------+-----------------------------------------------------+
56 /// ```
57 Indent,
58 /// Tree mode
59 ///
60 /// Example:
61 /// ```text
62 /// > explain format tree select x from values (1) t(x);
63 /// +---------------+-------------------------------+
64 /// | plan_type | plan |
65 /// +---------------+-------------------------------+
66 /// | physical_plan | ┌───────────────────────────┐ |
67 /// | | │ ProjectionExec │ |
68 /// | | │ -------------------- │ |
69 /// | | │ x: column1@0 │ |
70 /// | | └─────────────┬─────────────┘ |
71 /// | | ┌─────────────┴─────────────┐ |
72 /// | | │ DataSourceExec │ |
73 /// | | │ -------------------- │ |
74 /// | | │ bytes: 128 │ |
75 /// | | │ format: memory │ |
76 /// | | │ rows: 1 │ |
77 /// | | └───────────────────────────┘ |
78 /// | | |
79 /// +---------------+-------------------------------+
80 /// ```
81 Tree,
82 /// Postgres Json mode
83 ///
84 /// A displayable structure that produces plan in postgresql JSON format.
85 ///
86 /// Users can use this format to visualize the plan in existing plan
87 /// visualization tools, for example [dalibo](https://explain.dalibo.com/)
88 ///
89 /// Example:
90 /// ```text
91 /// > explain format pgjson select x from values (1) t(x);
92 /// +--------------+--------------------------------------+
93 /// | plan_type | plan |
94 /// +--------------+--------------------------------------+
95 /// | logical_plan | [ |
96 /// | | { |
97 /// | | "Plan": { |
98 /// | | "Alias": "t", |
99 /// | | "Node Type": "Subquery", |
100 /// | | "Output": [ |
101 /// | | "x" |
102 /// | | ], |
103 /// | | "Plans": [ |
104 /// | | { |
105 /// | | "Expressions": [ |
106 /// | | "column1 AS x" |
107 /// | | ], |
108 /// | | "Node Type": "Projection", |
109 /// | | "Output": [ |
110 /// | | "x" |
111 /// | | ], |
112 /// | | "Plans": [ |
113 /// | | { |
114 /// | | "Node Type": "Values", |
115 /// | | "Output": [ |
116 /// | | "column1" |
117 /// | | ], |
118 /// | | "Plans": [], |
119 /// | | "Values": "(Int64(1))" |
120 /// | | } |
121 /// | | ] |
122 /// | | } |
123 /// | | ] |
124 /// | | } |
125 /// | | } |
126 /// | | ] |
127 /// +--------------+--------------------------------------+
128 /// ```
129 PostgresJSON,
130 /// Graphviz mode
131 ///
132 /// Example:
133 /// ```text
134 /// > explain format graphviz select x from values (1) t(x);
135 /// +--------------+------------------------------------------------------------------------+
136 /// | plan_type | plan |
137 /// +--------------+------------------------------------------------------------------------+
138 /// | logical_plan | |
139 /// | | // Begin DataFusion GraphViz Plan, |
140 /// | | // display it online here: https://dreampuf.github.io/GraphvizOnline |
141 /// | | |
142 /// | | digraph { |
143 /// | | subgraph cluster_1 |
144 /// | | { |
145 /// | | graph[label="LogicalPlan"] |
146 /// | | 2[shape=box label="SubqueryAlias: t"] |
147 /// | | 3[shape=box label="Projection: column1 AS x"] |
148 /// | | 2 -> 3 [arrowhead=none, arrowtail=normal, dir=back] |
149 /// | | 4[shape=box label="Values: (Int64(1))"] |
150 /// | | 3 -> 4 [arrowhead=none, arrowtail=normal, dir=back] |
151 /// | | } |
152 /// | | subgraph cluster_5 |
153 /// | | { |
154 /// | | graph[label="Detailed LogicalPlan"] |
155 /// | | 6[shape=box label="SubqueryAlias: t\nSchema: [x:Int64;N]"] |
156 /// | | 7[shape=box label="Projection: column1 AS x\nSchema: [x:Int64;N]"] |
157 /// | | 6 -> 7 [arrowhead=none, arrowtail=normal, dir=back] |
158 /// | | 8[shape=box label="Values: (Int64(1))\nSchema: [column1:Int64;N]"] |
159 /// | | 7 -> 8 [arrowhead=none, arrowtail=normal, dir=back] |
160 /// | | } |
161 /// | | } |
162 /// | | // End DataFusion GraphViz Plan |
163 /// | | |
164 /// +--------------+------------------------------------------------------------------------+
165 /// ```
166 Graphviz,
167}
168
169/// Implement parsing strings to `ExplainFormat`
170impl FromStr for ExplainFormat {
171 type Err = DataFusionError;
172
173 fn from_str(format: &str) -> Result<Self, Self::Err> {
174 match format.to_lowercase().as_str() {
175 "indent" => Ok(ExplainFormat::Indent),
176 "tree" => Ok(ExplainFormat::Tree),
177 "pgjson" => Ok(ExplainFormat::PostgresJSON),
178 "graphviz" => Ok(ExplainFormat::Graphviz),
179 _ => Err(DataFusionError::Configuration(format!(
180 "Invalid explain format. Expected 'indent', 'tree', 'pgjson' or 'graphviz'. Got '{format}'"
181 ))),
182 }
183 }
184}
185
186impl Display for ExplainFormat {
187 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
188 let s = match self {
189 ExplainFormat::Indent => "indent",
190 ExplainFormat::Tree => "tree",
191 ExplainFormat::PostgresJSON => "pgjson",
192 ExplainFormat::Graphviz => "graphviz",
193 };
194 write!(f, "{s}")
195 }
196}
197
198impl ConfigField for ExplainFormat {
199 fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
200 v.some(key, self, description)
201 }
202
203 fn set(&mut self, _: &str, value: &str) -> Result<()> {
204 *self = ExplainFormat::from_str(value)?;
205 Ok(())
206 }
207}
208
209/// Categorizes metrics so the display layer can choose the desired verbosity.
210///
211/// The `datafusion.explain.analyze_level` configuration controls which
212/// type is shown:
213/// - `"dev"` (the default): all metrics are shown.
214/// - `"summary"`: only metrics tagged as `Summary` are shown.
215///
216/// This is orthogonal to [`MetricCategory`], which filters by *what kind*
217/// of value a metric represents (rows / bytes / timing).
218///
219/// # Difference from `EXPLAIN ANALYZE VERBOSE`
220///
221/// The `VERBOSE` keyword controls whether per-partition metrics are shown
222/// (when specified) or aggregated metrics are displayed (when omitted).
223/// In contrast, `MetricType` determines which *levels* of metrics are
224/// displayed.
225#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
226pub enum MetricType {
227 /// Common metrics for high-level insights (answering which operator is slow)
228 Summary,
229 /// For deep operator-level introspection for developers
230 Dev,
231}
232
233impl MetricType {
234 /// Returns the set of metric types that should be shown for this level.
235 ///
236 /// `Dev` is a superset of `Summary`: when the user selects
237 /// `analyze_level = 'dev'`, both `Summary` and `Dev` metrics are shown.
238 pub fn included_types(self) -> Vec<MetricType> {
239 match self {
240 MetricType::Summary => vec![MetricType::Summary],
241 MetricType::Dev => vec![MetricType::Summary, MetricType::Dev],
242 }
243 }
244}
245
246impl FromStr for MetricType {
247 type Err = DataFusionError;
248
249 fn from_str(s: &str) -> Result<Self, Self::Err> {
250 match s.trim().to_lowercase().as_str() {
251 "summary" => Ok(Self::Summary),
252 "dev" => Ok(Self::Dev),
253 other => Err(DataFusionError::Configuration(format!(
254 "Invalid explain analyze level. Expected 'summary' or 'dev'. Got '{other}'"
255 ))),
256 }
257 }
258}
259
260impl Display for MetricType {
261 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262 match self {
263 Self::Summary => write!(f, "summary"),
264 Self::Dev => write!(f, "dev"),
265 }
266 }
267}
268
269impl ConfigField for MetricType {
270 fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
271 v.some(key, self, description)
272 }
273
274 fn set(&mut self, _: &str, value: &str) -> Result<()> {
275 *self = MetricType::from_str(value)?;
276 Ok(())
277 }
278}
279
280/// Classifies a metric by what it measures.
281///
282/// This is orthogonal to [`MetricType`] (Summary / Dev), which controls
283/// *verbosity*. `MetricCategory` controls *what kind of value* is shown,
284/// so that `EXPLAIN ANALYZE` output can be narrowed to only the categories
285/// that are useful in a given context.
286///
287/// In particular this is useful for testing since metrics differ in their stability across runs:
288/// - [`Rows`](Self::Rows) and [`Bytes`](Self::Bytes) depend only on the plan
289/// and the data, so they are mostly deterministic across runs (given the same
290/// input). Variations can existing e.g. because of non-deterministic ordering
291/// of evaluation between threads.
292/// Running with a single target partition often makes these metrics stable enough to assert on in tests.
293/// - [`Timing`](Self::Timing) depends on hardware, system load, scheduling,
294/// etc., so it varies from run to run even on the same machine.
295///
296/// [`MetricCategory`] is especially useful in sqllogictest (`.slt`) files:
297/// setting `datafusion.explain.analyze_categories = 'rows'` lets a test
298/// assert on row-count metrics without sprinkling `<slt:ignore>` over every
299/// timing value.
300///
301/// Metrics that do not declare a category (the default for custom
302/// `Count` / `Gauge` metrics) are treated as
303/// [`Uncategorized`](Self::Uncategorized) for filtering purposes.
304#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
305pub enum MetricCategory {
306 /// Row counts and related dimensionless counters: `output_rows`,
307 /// `spilled_rows`, `output_batches`, pruning metrics, ratios, etc.
308 ///
309 /// Mostly deterministic given the same plan and data.
310 Rows,
311 /// Byte measurements: `output_bytes`, `spilled_bytes`,
312 /// `current_memory_usage`, `bytes_scanned`, etc.
313 ///
314 /// Mostly deterministic given the same plan and data.
315 Bytes,
316 /// Wall-clock durations and timestamps: `elapsed_compute`,
317 /// operator-defined `Time` metrics, `start_timestamp` /
318 /// `end_timestamp`, etc.
319 ///
320 /// **Non-deterministic** — varies across runs even on the same hardware.
321 Timing,
322 /// Catch-all for metrics that do not fit into [`Rows`](Self::Rows),
323 /// [`Bytes`](Self::Bytes), or [`Timing`](Self::Timing).
324 ///
325 /// Custom `Count` / `Gauge` metrics that are not explicitly assigned
326 /// a category are treated as `Uncategorized` for filtering purposes.
327 ///
328 /// This variant lets users explicitly include or exclude these
329 /// metrics, e.g.:
330 /// ```sql
331 /// SET datafusion.explain.analyze_categories = 'rows, bytes, uncategorized';
332 /// ```
333 Uncategorized,
334}
335
336impl FromStr for MetricCategory {
337 type Err = DataFusionError;
338
339 fn from_str(s: &str) -> Result<Self, Self::Err> {
340 match s.trim().to_lowercase().as_str() {
341 "rows" => Ok(Self::Rows),
342 "bytes" => Ok(Self::Bytes),
343 "timing" => Ok(Self::Timing),
344 "uncategorized" => Ok(Self::Uncategorized),
345 other => Err(DataFusionError::Configuration(format!(
346 "Invalid metric category '{other}'. \
347 Expected 'rows', 'bytes', 'timing', or 'uncategorized'."
348 ))),
349 }
350 }
351}
352
353impl Display for MetricCategory {
354 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
355 match self {
356 Self::Rows => write!(f, "rows"),
357 Self::Bytes => write!(f, "bytes"),
358 Self::Timing => write!(f, "timing"),
359 Self::Uncategorized => write!(f, "uncategorized"),
360 }
361 }
362}
363
364/// Controls which [`MetricCategory`] values are shown in `EXPLAIN ANALYZE`.
365///
366/// Set via `SET datafusion.explain.analyze_categories = '...'`.
367///
368/// See [`MetricCategory`] for the determinism properties that motivate
369/// this filter.
370#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
371pub enum ExplainAnalyzeCategories {
372 /// Show all metrics regardless of category (the default).
373 #[default]
374 All,
375 /// Show only metrics whose category is in the list.
376 /// Metrics with no declared category are treated as
377 /// [`Uncategorized`](MetricCategory::Uncategorized) for filtering.
378 ///
379 /// An **empty** vec means "plan only" — suppress all metrics.
380 Only(Vec<MetricCategory>),
381}
382
383impl FromStr for ExplainAnalyzeCategories {
384 type Err = DataFusionError;
385
386 fn from_str(s: &str) -> Result<Self, Self::Err> {
387 let s = s.trim().to_lowercase();
388 match s.as_str() {
389 "all" => Ok(Self::All),
390 "none" => Ok(Self::Only(vec![])),
391 other => {
392 let mut cats = Vec::new();
393 for part in other.split(',') {
394 cats.push(part.trim().parse::<MetricCategory>()?);
395 }
396 cats.dedup();
397 Ok(Self::Only(cats))
398 }
399 }
400 }
401}
402
403impl Display for ExplainAnalyzeCategories {
404 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
405 match self {
406 Self::All => write!(f, "all"),
407 Self::Only(cats) if cats.is_empty() => write!(f, "none"),
408 Self::Only(cats) => {
409 let mut first = true;
410 for cat in cats {
411 if !first {
412 write!(f, ",")?;
413 }
414 first = false;
415 write!(f, "{cat}")?;
416 }
417 Ok(())
418 }
419 }
420 }
421}
422
423impl ConfigField for ExplainAnalyzeCategories {
424 fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
425 v.some(key, self, description)
426 }
427
428 fn set(&mut self, _: &str, value: &str) -> Result<()> {
429 *self = ExplainAnalyzeCategories::from_str(value)?;
430 Ok(())
431 }
432}