datafusion_common/
error.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! DataFusion error types
19#[cfg(feature = "backtrace")]
20use std::backtrace::{Backtrace, BacktraceStatus};
21
22use std::borrow::Cow;
23use std::collections::VecDeque;
24use std::error::Error;
25use std::fmt::{Display, Formatter};
26use std::io;
27use std::result;
28use std::sync::Arc;
29
30use crate::utils::datafusion_strsim::normalized_levenshtein;
31use crate::utils::quote_identifier;
32use crate::{Column, DFSchema, Diagnostic, TableReference};
33#[cfg(feature = "avro")]
34use apache_avro::Error as AvroError;
35use arrow::error::ArrowError;
36#[cfg(feature = "parquet")]
37use parquet::errors::ParquetError;
38use sqlparser::parser::ParserError;
39use tokio::task::JoinError;
40
41/// Result type for operations that could result in an [DataFusionError]
42pub type Result<T, E = DataFusionError> = result::Result<T, E>;
43
44/// Result type for operations that could result in an [DataFusionError] and needs to be shared (wrapped into `Arc`).
45pub type SharedResult<T> = result::Result<T, Arc<DataFusionError>>;
46
47/// Error type for generic operations that could result in DataFusionError::External
48pub type GenericError = Box<dyn Error + Send + Sync>;
49
50/// DataFusion error
51#[derive(Debug)]
52pub enum DataFusionError {
53    /// Error returned by arrow.
54    ///
55    /// 2nd argument is for optional backtrace
56    ArrowError(Box<ArrowError>, Option<String>),
57    /// Error when reading / writing Parquet data.
58    #[cfg(feature = "parquet")]
59    ParquetError(Box<ParquetError>),
60    /// Error when reading Avro data.
61    #[cfg(feature = "avro")]
62    AvroError(Box<AvroError>),
63    /// Error when reading / writing to / from an object_store (e.g. S3 or LocalFile)
64    #[cfg(feature = "object_store")]
65    ObjectStore(Box<object_store::Error>),
66    /// Error when an I/O operation fails
67    IoError(io::Error),
68    /// Error when SQL is syntactically incorrect.
69    ///
70    /// 2nd argument is for optional backtrace
71    SQL(Box<ParserError>, Option<String>),
72    /// Error when a feature is not yet implemented.
73    ///
74    /// These errors are sometimes returned for features that are still in
75    /// development and are not entirely complete. Often, these errors are
76    /// tracked in our issue tracker.
77    NotImplemented(String),
78    /// Error due to bugs in DataFusion
79    ///
80    /// This error should not happen in normal usage of DataFusion. It results
81    /// from something that wasn't expected/anticipated by the implementation
82    /// and that is most likely a bug (the error message even encourages users
83    /// to open a bug report). A user should not be able to trigger internal
84    /// errors under normal circumstances by feeding in malformed queries, bad
85    /// data, etc.
86    ///
87    /// Note that I/O errors (or any error that happens due to external systems)
88    /// do NOT fall under this category. See other variants such as
89    /// [`Self::IoError`] and [`Self::External`].
90    ///
91    /// DataFusions has internal invariants that the compiler is not always able
92    /// to check. This error is raised when one of those invariants does not
93    /// hold for some reason.
94    Internal(String),
95    /// Error during planning of the query.
96    ///
97    /// This error happens when the user provides a bad query or plan, for
98    /// example the user attempts to call a function that doesn't exist, or if
99    /// the types of a function call are not supported.
100    Plan(String),
101    /// Error for invalid or unsupported configuration options.
102    Configuration(String),
103    /// Error when there is a problem with the query related to schema.
104    ///
105    /// This error can be returned in cases such as when schema inference is not
106    /// possible and when column names are not unique.
107    ///
108    /// 2nd argument is for optional backtrace
109    /// Boxing the optional backtrace to prevent <https://rust-lang.github.io/rust-clippy/master/index.html#/result_large_err>
110    SchemaError(Box<SchemaError>, Box<Option<String>>),
111    /// Error during execution of the query.
112    ///
113    /// This error is returned when an error happens during execution due to a
114    /// malformed input. For example, the user passed malformed arguments to a
115    /// SQL method, opened a CSV file that is broken, or tried to divide an
116    /// integer by zero.
117    Execution(String),
118    /// [`JoinError`] during execution of the query.
119    ///
120    /// This error can't occur for unjoined tasks, such as execution shutdown.
121    ExecutionJoin(Box<JoinError>),
122    /// Error when resources (such as memory of scratch disk space) are exhausted.
123    ///
124    /// This error is thrown when a consumer cannot acquire additional memory
125    /// or other resources needed to execute the query from the Memory Manager.
126    ResourcesExhausted(String),
127    /// Errors originating from outside DataFusion's core codebase.
128    ///
129    /// For example, a custom S3Error from the crate datafusion-objectstore-s3
130    External(GenericError),
131    /// Error with additional context
132    Context(String, Box<DataFusionError>),
133    /// Errors from either mapping LogicalPlans to/from Substrait plans
134    /// or serializing/deserializing protobytes to Substrait plans
135    Substrait(String),
136    /// Error wrapped together with additional contextual information intended
137    /// for end users, to help them understand what went wrong by providing
138    /// human-readable messages, and locations in the source query that relate
139    /// to the error in some way.
140    Diagnostic(Box<Diagnostic>, Box<DataFusionError>),
141    /// A collection of one or more [`DataFusionError`]. Useful in cases where
142    /// DataFusion can recover from an erroneous state, and produce more errors
143    /// before terminating. e.g. when planning a SELECT clause, DataFusion can
144    /// synchronize to the next `SelectItem` if the previous one had errors. The
145    /// end result is that the user can see errors about all `SelectItem`,
146    /// instead of just the first one.
147    Collection(Vec<DataFusionError>),
148    /// A [`DataFusionError`] which shares an underlying [`DataFusionError`].
149    ///
150    /// This is useful when the same underlying [`DataFusionError`] is passed
151    /// to multiple receivers. For example, when the source of a repartition
152    /// errors and the error is propagated to multiple consumers.
153    Shared(Arc<DataFusionError>),
154}
155
156#[macro_export]
157macro_rules! context {
158    ($desc:expr, $err:expr) => {
159        $err.context(format!("{} at {}:{}", $desc, file!(), line!()))
160    };
161}
162
163/// Schema-related errors
164#[derive(Debug)]
165pub enum SchemaError {
166    /// Schema contains a (possibly) qualified and unqualified field with same unqualified name
167    AmbiguousReference { field: Box<Column> },
168    /// Schema contains duplicate qualified field name
169    DuplicateQualifiedField {
170        qualifier: Box<TableReference>,
171        name: String,
172    },
173    /// Schema contains duplicate unqualified field name
174    DuplicateUnqualifiedField { name: String },
175    /// No field with this name
176    FieldNotFound {
177        field: Box<Column>,
178        valid_fields: Vec<Column>,
179    },
180}
181
182impl Display for SchemaError {
183    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
184        match self {
185            Self::FieldNotFound {
186                field,
187                valid_fields,
188            } => {
189                write!(f, "No field named {}", field.quoted_flat_name())?;
190                let lower_valid_fields = valid_fields
191                    .iter()
192                    .map(|column| column.flat_name().to_lowercase())
193                    .collect::<Vec<String>>();
194
195                let valid_fields_names = valid_fields
196                    .iter()
197                    .map(|column| column.flat_name())
198                    .collect::<Vec<String>>();
199                if lower_valid_fields.contains(&field.flat_name().to_lowercase()) {
200                    write!(
201                        f,
202                        ". Column names are case sensitive. You can use double quotes to refer to the \"{}\" column \
203                        or set the datafusion.sql_parser.enable_ident_normalization configuration",
204                        field.quoted_flat_name()
205                    )?;
206                }
207                let field_name = field.name();
208                if let Some(matched) = valid_fields_names
209                    .iter()
210                    .filter(|str| normalized_levenshtein(str, field_name) >= 0.5)
211                    .collect::<Vec<&String>>()
212                    .first()
213                {
214                    write!(f, ". Did you mean '{matched}'?")?;
215                } else if !valid_fields.is_empty() {
216                    write!(
217                        f,
218                        ". Valid fields are {}",
219                        valid_fields
220                            .iter()
221                            .map(|field| field.quoted_flat_name())
222                            .collect::<Vec<String>>()
223                            .join(", ")
224                    )?;
225                }
226                write!(f, ".")
227            }
228            Self::DuplicateQualifiedField { qualifier, name } => {
229                write!(
230                    f,
231                    "Schema contains duplicate qualified field name {}.{}",
232                    qualifier.to_quoted_string(),
233                    quote_identifier(name)
234                )
235            }
236            Self::DuplicateUnqualifiedField { name } => {
237                write!(
238                    f,
239                    "Schema contains duplicate unqualified field name {}",
240                    quote_identifier(name)
241                )
242            }
243            Self::AmbiguousReference { field } => {
244                if field.relation.is_some() {
245                    write!(
246                        f,
247                        "Schema contains qualified field name {} and unqualified field name {} which would be ambiguous",
248                        field.quoted_flat_name(),
249                        quote_identifier(&field.name)
250                    )
251                } else {
252                    write!(
253                        f,
254                        "Ambiguous reference to unqualified field {}",
255                        field.quoted_flat_name()
256                    )
257                }
258            }
259        }
260    }
261}
262
263impl Error for SchemaError {}
264
265impl From<std::fmt::Error> for DataFusionError {
266    fn from(_e: std::fmt::Error) -> Self {
267        DataFusionError::Execution("Fail to format".to_string())
268    }
269}
270
271impl From<io::Error> for DataFusionError {
272    fn from(e: io::Error) -> Self {
273        DataFusionError::IoError(e)
274    }
275}
276
277impl From<ArrowError> for DataFusionError {
278    fn from(e: ArrowError) -> Self {
279        DataFusionError::ArrowError(Box::new(e), None)
280    }
281}
282
283impl From<DataFusionError> for ArrowError {
284    fn from(e: DataFusionError) -> Self {
285        match e {
286            DataFusionError::ArrowError(e, _) => *e,
287            DataFusionError::External(e) => ArrowError::ExternalError(e),
288            other => ArrowError::ExternalError(Box::new(other)),
289        }
290    }
291}
292
293impl From<&Arc<DataFusionError>> for DataFusionError {
294    fn from(e: &Arc<DataFusionError>) -> Self {
295        if let DataFusionError::Shared(e_inner) = e.as_ref() {
296            // don't re-wrap
297            DataFusionError::Shared(Arc::clone(e_inner))
298        } else {
299            DataFusionError::Shared(Arc::clone(e))
300        }
301    }
302}
303
304#[cfg(feature = "parquet")]
305impl From<ParquetError> for DataFusionError {
306    fn from(e: ParquetError) -> Self {
307        DataFusionError::ParquetError(Box::new(e))
308    }
309}
310
311#[cfg(feature = "avro")]
312impl From<AvroError> for DataFusionError {
313    fn from(e: AvroError) -> Self {
314        DataFusionError::AvroError(Box::new(e))
315    }
316}
317
318#[cfg(feature = "object_store")]
319impl From<object_store::Error> for DataFusionError {
320    fn from(e: object_store::Error) -> Self {
321        DataFusionError::ObjectStore(Box::new(e))
322    }
323}
324
325#[cfg(feature = "object_store")]
326impl From<object_store::path::Error> for DataFusionError {
327    fn from(e: object_store::path::Error) -> Self {
328        DataFusionError::ObjectStore(Box::new(e.into()))
329    }
330}
331
332impl From<ParserError> for DataFusionError {
333    fn from(e: ParserError) -> Self {
334        DataFusionError::SQL(Box::new(e), None)
335    }
336}
337
338impl From<GenericError> for DataFusionError {
339    fn from(err: GenericError) -> Self {
340        // If the error is already a DataFusionError, not wrapping it.
341        if err.is::<DataFusionError>() {
342            if let Ok(e) = err.downcast::<DataFusionError>() {
343                *e
344            } else {
345                unreachable!()
346            }
347        } else {
348            DataFusionError::External(err)
349        }
350    }
351}
352
353impl Display for DataFusionError {
354    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
355        let error_prefix = self.error_prefix();
356        let message = self.message();
357        write!(f, "{error_prefix}{message}")
358    }
359}
360
361impl Error for DataFusionError {
362    fn source(&self) -> Option<&(dyn Error + 'static)> {
363        match self {
364            DataFusionError::ArrowError(e, _) => Some(e.as_ref()),
365            #[cfg(feature = "parquet")]
366            DataFusionError::ParquetError(e) => Some(e.as_ref()),
367            #[cfg(feature = "avro")]
368            DataFusionError::AvroError(e) => Some(e.as_ref()),
369            #[cfg(feature = "object_store")]
370            DataFusionError::ObjectStore(e) => Some(e.as_ref()),
371            DataFusionError::IoError(e) => Some(e),
372            DataFusionError::SQL(e, _) => Some(e.as_ref()),
373            DataFusionError::NotImplemented(_) => None,
374            DataFusionError::Internal(_) => None,
375            DataFusionError::Configuration(_) => None,
376            DataFusionError::Plan(_) => None,
377            DataFusionError::SchemaError(e, _) => Some(e.as_ref()),
378            DataFusionError::Execution(_) => None,
379            DataFusionError::ExecutionJoin(e) => Some(e.as_ref()),
380            DataFusionError::ResourcesExhausted(_) => None,
381            DataFusionError::External(e) => Some(e.as_ref()),
382            DataFusionError::Context(_, e) => Some(e.as_ref()),
383            DataFusionError::Substrait(_) => None,
384            DataFusionError::Diagnostic(_, e) => Some(e.as_ref()),
385            // Can't really make a Collection fit into the mold of "an error has
386            // at most one source", but returning the first one is probably good
387            // idea. Especially since `DataFusionError::Collection` is mostly
388            // meant for consumption by the end user, so shouldn't interfere
389            // with programmatic usage too much. Plus, having 1 or 5 errors
390            // doesn't really change the fact that the query is invalid and
391            // can't be executed.
392            DataFusionError::Collection(errs) => errs.first().map(|e| e as &dyn Error),
393            DataFusionError::Shared(e) => Some(e.as_ref()),
394        }
395    }
396}
397
398impl From<DataFusionError> for io::Error {
399    fn from(e: DataFusionError) -> Self {
400        io::Error::other(e)
401    }
402}
403
404impl DataFusionError {
405    /// The separator between the error message and the backtrace
406    pub const BACK_TRACE_SEP: &'static str = "\n\nbacktrace: ";
407
408    /// Get deepest underlying [`DataFusionError`]
409    ///
410    /// [`DataFusionError`]s sometimes form a chain, such as `DataFusionError::ArrowError()` in order to conform
411    /// to the correct error signature. Thus sometimes there is a chain several layers deep that can obscure the
412    /// original error. This function finds the lowest level DataFusionError possible.
413    ///
414    /// For example,  `find_root` will return`DataFusionError::ResourceExhausted` given the input
415    /// ```text
416    /// DataFusionError::ArrowError
417    ///   ArrowError::External
418    ///    Box(DataFusionError::Context)
419    ///      DataFusionError::ResourceExhausted
420    /// ```
421    ///
422    /// This may be the same as `self`.
423    pub fn find_root(&self) -> &Self {
424        // Note: This is a non-recursive algorithm so we do not run
425        // out of stack space, even for long error chains.
426
427        let mut last_datafusion_error = self;
428        let mut root_error: &dyn Error = self;
429        while let Some(source) = root_error.source() {
430            // walk the next level
431            root_error = source;
432            // remember the lowest datafusion error so far
433            if let Some(e) = root_error.downcast_ref::<DataFusionError>() {
434                last_datafusion_error = e;
435            } else if let Some(e) = root_error.downcast_ref::<Arc<DataFusionError>>() {
436                // As `Arc<T>::source()` calls through to `T::source()` we need to
437                // explicitly match `Arc<DataFusionError>` to capture it
438                last_datafusion_error = e.as_ref();
439            }
440        }
441        // return last checkpoint (which may be the original error)
442        last_datafusion_error
443    }
444
445    /// wraps self in Self::Context with a description
446    pub fn context(self, description: impl Into<String>) -> Self {
447        Self::Context(description.into(), Box::new(self))
448    }
449
450    /// Strips backtrace out of the error message
451    /// If backtrace enabled then error has a format "message" [`Self::BACK_TRACE_SEP`] "backtrace"
452    /// The method strips the backtrace and outputs "message"
453    pub fn strip_backtrace(&self) -> String {
454        self.to_string()
455            .split(Self::BACK_TRACE_SEP)
456            .collect::<Vec<&str>>()
457            .first()
458            .unwrap_or(&"")
459            .to_string()
460    }
461
462    /// To enable optional rust backtrace in DataFusion:
463    /// - [`Setup Env Variables`]<https://doc.rust-lang.org/std/backtrace/index.html#environment-variables>
464    /// - Enable `backtrace` cargo feature
465    ///
466    /// Example:
467    /// cargo build --features 'backtrace'
468    /// RUST_BACKTRACE=1 ./app
469    #[inline(always)]
470    pub fn get_back_trace() -> String {
471        #[cfg(feature = "backtrace")]
472        {
473            let back_trace = Backtrace::capture();
474            if back_trace.status() == BacktraceStatus::Captured {
475                return format!("{}{}", Self::BACK_TRACE_SEP, back_trace);
476            }
477
478            "".to_owned()
479        }
480
481        #[cfg(not(feature = "backtrace"))]
482        "".to_owned()
483    }
484
485    /// Return a [`DataFusionErrorBuilder`] to build a [`DataFusionError`]
486    pub fn builder() -> DataFusionErrorBuilder {
487        DataFusionErrorBuilder::default()
488    }
489
490    fn error_prefix(&self) -> &'static str {
491        match self {
492            DataFusionError::ArrowError(_, _) => "Arrow error: ",
493            #[cfg(feature = "parquet")]
494            DataFusionError::ParquetError(_) => "Parquet error: ",
495            #[cfg(feature = "avro")]
496            DataFusionError::AvroError(_) => "Avro error: ",
497            #[cfg(feature = "object_store")]
498            DataFusionError::ObjectStore(_) => "Object Store error: ",
499            DataFusionError::IoError(_) => "IO error: ",
500            DataFusionError::SQL(_, _) => "SQL error: ",
501            DataFusionError::NotImplemented(_) => {
502                "This feature is not implemented: "
503            }
504            DataFusionError::Internal(_) => "Internal error: ",
505            DataFusionError::Plan(_) => "Error during planning: ",
506            DataFusionError::Configuration(_) => {
507                "Invalid or Unsupported Configuration: "
508            }
509            DataFusionError::SchemaError(_, _) => "Schema error: ",
510            DataFusionError::Execution(_) => "Execution error: ",
511            DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ",
512            DataFusionError::ResourcesExhausted(_) => {
513                "Resources exhausted: "
514            }
515            DataFusionError::External(_) => "External error: ",
516            DataFusionError::Context(_, _) => "",
517            DataFusionError::Substrait(_) => "Substrait error: ",
518            DataFusionError::Diagnostic(_, _) => "",
519            DataFusionError::Collection(errs) => {
520                errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix()
521            }
522            DataFusionError::Shared(_) => "",
523        }
524    }
525
526    pub fn message(&self) -> Cow<str> {
527        match *self {
528            DataFusionError::ArrowError(ref desc, ref backtrace) => {
529                let backtrace = backtrace.clone().unwrap_or_else(|| "".to_owned());
530                Cow::Owned(format!("{desc}{backtrace}"))
531            }
532            #[cfg(feature = "parquet")]
533            DataFusionError::ParquetError(ref desc) => Cow::Owned(desc.to_string()),
534            #[cfg(feature = "avro")]
535            DataFusionError::AvroError(ref desc) => Cow::Owned(desc.to_string()),
536            DataFusionError::IoError(ref desc) => Cow::Owned(desc.to_string()),
537            DataFusionError::SQL(ref desc, ref backtrace) => {
538                let backtrace: String =
539                    backtrace.clone().unwrap_or_else(|| "".to_owned());
540                Cow::Owned(format!("{desc:?}{backtrace}"))
541            }
542            DataFusionError::Configuration(ref desc) => Cow::Owned(desc.to_string()),
543            DataFusionError::NotImplemented(ref desc) => Cow::Owned(desc.to_string()),
544            DataFusionError::Internal(ref desc) => Cow::Owned(format!(
545                "{desc}.\nThis issue was likely caused by a bug in DataFusion's code. \
546                Please help us to resolve this by filing a bug report in our issue tracker: \
547                https://github.com/apache/datafusion/issues"
548            )),
549            DataFusionError::Plan(ref desc) => Cow::Owned(desc.to_string()),
550            DataFusionError::SchemaError(ref desc, ref backtrace) => {
551                let backtrace: &str =
552                    &backtrace.as_ref().clone().unwrap_or_else(|| "".to_owned());
553                Cow::Owned(format!("{desc}{backtrace}"))
554            }
555            DataFusionError::Execution(ref desc) => Cow::Owned(desc.to_string()),
556            DataFusionError::ExecutionJoin(ref desc) => Cow::Owned(desc.to_string()),
557            DataFusionError::ResourcesExhausted(ref desc) => Cow::Owned(desc.to_string()),
558            DataFusionError::External(ref desc) => Cow::Owned(desc.to_string()),
559            #[cfg(feature = "object_store")]
560            DataFusionError::ObjectStore(ref desc) => Cow::Owned(desc.to_string()),
561            DataFusionError::Context(ref desc, ref err) => {
562                Cow::Owned(format!("{desc}\ncaused by\n{}", *err))
563            }
564            DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()),
565            DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()),
566            // Returning the message of the first error is probably fine enough,
567            // and makes `DataFusionError::Collection` a transparent wrapped,
568            // unless the end user explicitly calls `DataFusionError::iter`.
569            DataFusionError::Collection(ref errs) => errs
570                .first()
571                .expect("cannot construct DataFusionError::Collection with 0 errors")
572                .message(),
573            DataFusionError::Shared(ref desc) => Cow::Owned(desc.to_string()),
574        }
575    }
576
577    /// Wraps the error with contextual information intended for end users
578    pub fn with_diagnostic(self, diagnostic: Diagnostic) -> Self {
579        Self::Diagnostic(Box::new(diagnostic), Box::new(self))
580    }
581
582    /// Wraps the error with contextual information intended for end users.
583    /// Takes a function that inspects the error and returns the diagnostic to
584    /// wrap it with.
585    pub fn with_diagnostic_fn<F: FnOnce(&DataFusionError) -> Diagnostic>(
586        self,
587        f: F,
588    ) -> Self {
589        let diagnostic = f(&self);
590        self.with_diagnostic(diagnostic)
591    }
592
593    /// Gets the [`Diagnostic`] associated with the error, if any. If there is
594    /// more than one, only the outermost [`Diagnostic`] is returned.
595    pub fn diagnostic(&self) -> Option<&Diagnostic> {
596        struct DiagnosticsIterator<'a> {
597            head: &'a DataFusionError,
598        }
599
600        impl<'a> Iterator for DiagnosticsIterator<'a> {
601            type Item = &'a Diagnostic;
602
603            fn next(&mut self) -> Option<Self::Item> {
604                loop {
605                    if let DataFusionError::Diagnostic(diagnostics, source) = self.head {
606                        self.head = source.as_ref();
607                        return Some(diagnostics);
608                    }
609
610                    if let Some(source) = self
611                        .head
612                        .source()
613                        .and_then(|source| source.downcast_ref::<DataFusionError>())
614                    {
615                        self.head = source;
616                    } else {
617                        return None;
618                    }
619                }
620            }
621        }
622
623        DiagnosticsIterator { head: self }.next()
624    }
625
626    /// Return an iterator over this [`DataFusionError`] and any other
627    /// [`DataFusionError`]s in a [`DataFusionError::Collection`].
628    ///
629    /// Sometimes DataFusion is able to collect multiple errors in a SQL query
630    /// before terminating, e.g. across different expressions in a SELECT
631    /// statements or different sides of a UNION. This method returns an
632    /// iterator over all the errors in the collection.
633    ///
634    /// For this to work, the top-level error must be a
635    /// `DataFusionError::Collection`, not something that contains it.
636    pub fn iter(&self) -> impl Iterator<Item = &DataFusionError> {
637        struct ErrorIterator<'a> {
638            queue: VecDeque<&'a DataFusionError>,
639        }
640
641        impl<'a> Iterator for ErrorIterator<'a> {
642            type Item = &'a DataFusionError;
643
644            fn next(&mut self) -> Option<Self::Item> {
645                loop {
646                    let popped = self.queue.pop_front()?;
647                    match popped {
648                        DataFusionError::Collection(errs) => self.queue.extend(errs),
649                        _ => return Some(popped),
650                    }
651                }
652            }
653        }
654
655        let mut queue = VecDeque::new();
656        queue.push_back(self);
657        ErrorIterator { queue }
658    }
659}
660
661/// A builder for [`DataFusionError`]
662///
663/// This builder can be used to collect multiple errors and return them as a
664/// [`DataFusionError::Collection`].
665///
666/// # Example: no errors
667/// ```
668/// # use datafusion_common::DataFusionError;
669/// let mut builder = DataFusionError::builder();
670/// // ok_or returns the value if no errors have been added
671/// assert_eq!(builder.error_or(42).unwrap(), 42);
672/// ```
673///
674/// # Example: with errors
675/// ```
676/// # use datafusion_common::{assert_contains, DataFusionError};
677/// let mut builder = DataFusionError::builder();
678/// builder.add_error(DataFusionError::Internal("foo".to_owned()));
679/// // ok_or returns the value if no errors have been added
680/// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
681/// ```
682#[derive(Debug, Default)]
683pub struct DataFusionErrorBuilder(Vec<DataFusionError>);
684
685impl DataFusionErrorBuilder {
686    /// Create a new [`DataFusionErrorBuilder`]
687    pub fn new() -> Self {
688        Default::default()
689    }
690
691    /// Add an error to the in progress list
692    ///
693    /// # Example
694    /// ```
695    /// # use datafusion_common::{assert_contains, DataFusionError};
696    /// let mut builder = DataFusionError::builder();
697    /// builder.add_error(DataFusionError::Internal("foo".to_owned()));
698    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
699    /// ```
700    pub fn add_error(&mut self, error: DataFusionError) {
701        self.0.push(error);
702    }
703
704    /// Add an error to the in progress list, returning the builder
705    ///
706    /// # Example
707    /// ```
708    /// # use datafusion_common::{assert_contains, DataFusionError};
709    /// let builder = DataFusionError::builder()
710    ///   .with_error(DataFusionError::Internal("foo".to_owned()));
711    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
712    /// ```
713    pub fn with_error(mut self, error: DataFusionError) -> Self {
714        self.0.push(error);
715        self
716    }
717
718    /// Returns `Ok(ok)` if no errors were added to the builder,
719    /// otherwise returns a `Result::Err`
720    pub fn error_or<T>(self, ok: T) -> Result<T, DataFusionError> {
721        match self.0.len() {
722            0 => Ok(ok),
723            1 => Err(self.0.into_iter().next().expect("length matched 1")),
724            _ => Err(DataFusionError::Collection(self.0)),
725        }
726    }
727}
728
729/// Unwrap an `Option` if possible. Otherwise return an `DataFusionError::Internal`.
730/// In normal usage of DataFusion the unwrap should always succeed.
731///
732/// Example: `let values = unwrap_or_internal_err!(values)`
733#[macro_export]
734macro_rules! unwrap_or_internal_err {
735    ($Value: ident) => {
736        $Value.ok_or_else(|| {
737            DataFusionError::Internal(format!(
738                "{} should not be None",
739                stringify!($Value)
740            ))
741        })?
742    };
743}
744
745/// Add a macros for concise  DataFusionError::* errors declaration
746/// supports placeholders the same way as `format!`
747/// Examples:
748///     plan_err!("Error")
749///     plan_err!("Error {}", val)
750///     plan_err!("Error {:?}", val)
751///     plan_err!("Error {val}")
752///     plan_err!("Error {val:?}")
753///
754/// `NAME_ERR` -  macro name for wrapping Err(DataFusionError::*)
755/// `NAME_DF_ERR` -  macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity
756/// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc
757macro_rules! make_error {
758    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); };
759    (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => {
760        ::paste::paste!{
761            /// Macro wraps `$ERR` to add backtrace feature
762            #[macro_export]
763            macro_rules! $NAME_DF_ERR {
764                ($d($d args:expr),* $d(; diagnostic=$d DIAG:expr)?) => {{
765                    let err =$crate::DataFusionError::$ERR(
766                        ::std::format!(
767                            "{}{}",
768                            ::std::format!($d($d args),*),
769                            $crate::DataFusionError::get_back_trace(),
770                        ).into()
771                    );
772                    $d (
773                        let err = err.with_diagnostic($d DIAG);
774                    )?
775                    err
776                }
777            }
778        }
779
780            /// Macro wraps Err(`$ERR`) to add backtrace feature
781            #[macro_export]
782            macro_rules! $NAME_ERR {
783                ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{
784                    let err = $crate::[<_ $NAME_DF_ERR>]!($d($d args),*);
785                    $d (
786                        let err = err.with_diagnostic($d DIAG);
787                    )?
788                    Err(err)
789
790                }}
791            }
792
793
794            // Note: Certain macros are used in this  crate, but not all.
795            // This macro generates a use or all of them in case they are needed
796            // so we allow unused code to avoid warnings when they are not used
797            #[doc(hidden)]
798            #[allow(unused)]
799            pub use $NAME_ERR as [<_ $NAME_ERR>];
800            #[doc(hidden)]
801            #[allow(unused)]
802            pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
803        }
804    };
805}
806
807// Exposes a macro to create `DataFusionError::Plan` with optional backtrace
808make_error!(plan_err, plan_datafusion_err, Plan);
809
810// Exposes a macro to create `DataFusionError::Internal` with optional backtrace
811make_error!(internal_err, internal_datafusion_err, Internal);
812
813// Exposes a macro to create `DataFusionError::NotImplemented` with optional backtrace
814make_error!(not_impl_err, not_impl_datafusion_err, NotImplemented);
815
816// Exposes a macro to create `DataFusionError::Execution` with optional backtrace
817make_error!(exec_err, exec_datafusion_err, Execution);
818
819// Exposes a macro to create `DataFusionError::Configuration` with optional backtrace
820make_error!(config_err, config_datafusion_err, Configuration);
821
822// Exposes a macro to create `DataFusionError::Substrait` with optional backtrace
823make_error!(substrait_err, substrait_datafusion_err, Substrait);
824
825// Exposes a macro to create `DataFusionError::ResourcesExhausted` with optional backtrace
826make_error!(resources_err, resources_datafusion_err, ResourcesExhausted);
827
828// Exposes a macro to create `DataFusionError::SQL` with optional backtrace
829#[macro_export]
830macro_rules! sql_datafusion_err {
831    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
832        let err = DataFusionError::SQL(Box::new($ERR), Some(DataFusionError::get_back_trace()));
833        $(
834            let err = err.with_diagnostic($DIAG);
835        )?
836        err
837    }};
838}
839
840// Exposes a macro to create `Err(DataFusionError::SQL)` with optional backtrace
841#[macro_export]
842macro_rules! sql_err {
843    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
844        let err = datafusion_common::sql_datafusion_err!($ERR);
845        $(
846            let err = err.with_diagnostic($DIAG);
847        )?
848        Err(err)
849    }};
850}
851
852// Exposes a macro to create `DataFusionError::ArrowError` with optional backtrace
853#[macro_export]
854macro_rules! arrow_datafusion_err {
855    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
856        let err = DataFusionError::ArrowError(Box::new($ERR), Some(DataFusionError::get_back_trace()));
857        $(
858            let err = err.with_diagnostic($DIAG);
859        )?
860        err
861    }};
862}
863
864// Exposes a macro to create `Err(DataFusionError::ArrowError)` with optional backtrace
865#[macro_export]
866macro_rules! arrow_err {
867    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {
868    {
869        let err = datafusion_common::arrow_datafusion_err!($ERR);
870        $(
871            let err = err.with_diagnostic($DIAG);
872        )?
873        Err(err)
874    }};
875}
876
877// Exposes a macro to create `DataFusionError::SchemaError` with optional backtrace
878#[macro_export]
879macro_rules! schema_datafusion_err {
880    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
881        let err = $crate::error::DataFusionError::SchemaError(
882            Box::new($ERR),
883            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
884        );
885        $(
886            let err = err.with_diagnostic($DIAG);
887        )?
888        err
889    }};
890}
891
892// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional backtrace
893#[macro_export]
894macro_rules! schema_err {
895    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
896        let err = $crate::error::DataFusionError::SchemaError(
897            Box::new($ERR),
898            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
899        );
900        $(
901            let err = err.with_diagnostic($DIAG);
902        )?
903        Err(err)
904    }
905    };
906}
907
908// To avoid compiler error when using macro in the same crate:
909// macros from the current crate cannot be referred to by absolute paths
910pub use schema_err as _schema_err;
911
912/// Create a "field not found" DataFusion::SchemaError
913pub fn field_not_found<R: Into<TableReference>>(
914    qualifier: Option<R>,
915    name: &str,
916    schema: &DFSchema,
917) -> DataFusionError {
918    schema_datafusion_err!(SchemaError::FieldNotFound {
919        field: Box::new(Column::new(qualifier, name)),
920        valid_fields: schema.columns().to_vec(),
921    })
922}
923
924/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
925pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError {
926    schema_datafusion_err!(SchemaError::FieldNotFound {
927        field: Box::new(Column::new_unqualified(name)),
928        valid_fields: schema.columns().to_vec(),
929    })
930}
931
932pub fn add_possible_columns_to_diag(
933    diagnostic: &mut Diagnostic,
934    field: &Column,
935    valid_fields: &[Column],
936) {
937    let field_names: Vec<String> = valid_fields
938        .iter()
939        .filter_map(|f| {
940            if normalized_levenshtein(f.name(), field.name()) >= 0.5 {
941                Some(f.flat_name())
942            } else {
943                None
944            }
945        })
946        .collect();
947
948    for name in field_names {
949        diagnostic.add_note(format!("possible column {name}"), None);
950    }
951}
952
953#[cfg(test)]
954mod test {
955    use super::*;
956
957    use std::mem::size_of;
958    use std::sync::Arc;
959
960    use arrow::error::ArrowError;
961
962    #[test]
963    fn test_error_size() {
964        // Since Errors influence the size of Result which influence the size of the stack
965        // please don't allow this to grow larger
966        assert_eq!(size_of::<SchemaError>(), 40);
967        assert_eq!(size_of::<DataFusionError>(), 40);
968    }
969
970    #[test]
971    fn datafusion_error_to_arrow() {
972        let res = return_arrow_error().unwrap_err();
973        assert!(res
974            .to_string()
975            .starts_with("External error: Error during planning: foo"));
976    }
977
978    #[test]
979    fn arrow_error_to_datafusion() {
980        let res = return_datafusion_error().unwrap_err();
981        assert_eq!(res.strip_backtrace(), "Arrow error: Schema error: bar");
982    }
983
984    // To pass the test the environment variable RUST_BACKTRACE should be set to 1 to enforce backtrace
985    #[cfg(feature = "backtrace")]
986    #[test]
987    #[allow(clippy::unnecessary_literal_unwrap)]
988    fn test_enabled_backtrace() {
989        match std::env::var("RUST_BACKTRACE") {
990            Ok(val) if val == "1" => {}
991            _ => panic!("Environment variable RUST_BACKTRACE must be set to 1"),
992        };
993
994        let res: Result<(), DataFusionError> = plan_err!("Err");
995        let err = res.unwrap_err().to_string();
996        assert!(err.contains(DataFusionError::BACK_TRACE_SEP));
997        assert_eq!(
998            err.split(DataFusionError::BACK_TRACE_SEP)
999                .collect::<Vec<&str>>()
1000                .first()
1001                .unwrap(),
1002            &"Error during planning: Err"
1003        );
1004        assert!(!err
1005            .split(DataFusionError::BACK_TRACE_SEP)
1006            .collect::<Vec<&str>>()
1007            .get(1)
1008            .unwrap()
1009            .is_empty());
1010    }
1011
1012    #[cfg(not(feature = "backtrace"))]
1013    #[test]
1014    #[allow(clippy::unnecessary_literal_unwrap)]
1015    fn test_disabled_backtrace() {
1016        let res: Result<(), DataFusionError> = plan_err!("Err");
1017        let res = res.unwrap_err().to_string();
1018        assert!(!res.contains(DataFusionError::BACK_TRACE_SEP));
1019        assert_eq!(res, "Error during planning: Err");
1020    }
1021
1022    #[test]
1023    fn test_find_root_error() {
1024        do_root_test(
1025            DataFusionError::Context(
1026                "it happened!".to_string(),
1027                Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
1028            ),
1029            DataFusionError::ResourcesExhausted("foo".to_string()),
1030        );
1031
1032        do_root_test(
1033            DataFusionError::ArrowError(
1034                Box::new(ArrowError::ExternalError(Box::new(
1035                    DataFusionError::ResourcesExhausted("foo".to_string()),
1036                ))),
1037                None,
1038            ),
1039            DataFusionError::ResourcesExhausted("foo".to_string()),
1040        );
1041
1042        do_root_test(
1043            DataFusionError::External(Box::new(DataFusionError::ResourcesExhausted(
1044                "foo".to_string(),
1045            ))),
1046            DataFusionError::ResourcesExhausted("foo".to_string()),
1047        );
1048
1049        do_root_test(
1050            DataFusionError::External(Box::new(ArrowError::ExternalError(Box::new(
1051                DataFusionError::ResourcesExhausted("foo".to_string()),
1052            )))),
1053            DataFusionError::ResourcesExhausted("foo".to_string()),
1054        );
1055
1056        do_root_test(
1057            DataFusionError::ArrowError(
1058                Box::new(ArrowError::ExternalError(Box::new(
1059                    ArrowError::ExternalError(Box::new(
1060                        DataFusionError::ResourcesExhausted("foo".to_string()),
1061                    )),
1062                ))),
1063                None,
1064            ),
1065            DataFusionError::ResourcesExhausted("foo".to_string()),
1066        );
1067
1068        do_root_test(
1069            DataFusionError::External(Box::new(Arc::new(
1070                DataFusionError::ResourcesExhausted("foo".to_string()),
1071            ))),
1072            DataFusionError::ResourcesExhausted("foo".to_string()),
1073        );
1074
1075        do_root_test(
1076            DataFusionError::External(Box::new(Arc::new(ArrowError::ExternalError(
1077                Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
1078            )))),
1079            DataFusionError::ResourcesExhausted("foo".to_string()),
1080        );
1081    }
1082
1083    #[test]
1084    #[allow(clippy::unnecessary_literal_unwrap)]
1085    fn test_make_error_parse_input() {
1086        let res: Result<(), DataFusionError> = plan_err!("Err");
1087        let res = res.unwrap_err();
1088        assert_eq!(res.strip_backtrace(), "Error during planning: Err");
1089
1090        let extra1 = "extra1";
1091        let extra2 = "extra2";
1092
1093        let res: Result<(), DataFusionError> = plan_err!("Err {} {}", extra1, extra2);
1094        let res = res.unwrap_err();
1095        assert_eq!(
1096            res.strip_backtrace(),
1097            "Error during planning: Err extra1 extra2"
1098        );
1099
1100        let res: Result<(), DataFusionError> =
1101            plan_err!("Err {:?} {:#?}", extra1, extra2);
1102        let res = res.unwrap_err();
1103        assert_eq!(
1104            res.strip_backtrace(),
1105            "Error during planning: Err \"extra1\" \"extra2\""
1106        );
1107
1108        let res: Result<(), DataFusionError> = plan_err!("Err {extra1} {extra2}");
1109        let res = res.unwrap_err();
1110        assert_eq!(
1111            res.strip_backtrace(),
1112            "Error during planning: Err extra1 extra2"
1113        );
1114
1115        let res: Result<(), DataFusionError> = plan_err!("Err {extra1:?} {extra2:#?}");
1116        let res = res.unwrap_err();
1117        assert_eq!(
1118            res.strip_backtrace(),
1119            "Error during planning: Err \"extra1\" \"extra2\""
1120        );
1121    }
1122
1123    #[test]
1124    fn external_error() {
1125        // assert not wrapping DataFusionError
1126        let generic_error: GenericError =
1127            Box::new(DataFusionError::Plan("test".to_string()));
1128        let datafusion_error: DataFusionError = generic_error.into();
1129        println!("{}", datafusion_error.strip_backtrace());
1130        assert_eq!(
1131            datafusion_error.strip_backtrace(),
1132            "Error during planning: test"
1133        );
1134
1135        // assert wrapping other Error
1136        let generic_error: GenericError = Box::new(io::Error::other("io error"));
1137        let datafusion_error: DataFusionError = generic_error.into();
1138        println!("{}", datafusion_error.strip_backtrace());
1139        assert_eq!(
1140            datafusion_error.strip_backtrace(),
1141            "External error: io error"
1142        );
1143    }
1144
1145    #[test]
1146    fn external_error_no_recursive() {
1147        let generic_error_1: GenericError = Box::new(io::Error::other("io error"));
1148        let external_error_1: DataFusionError = generic_error_1.into();
1149        let generic_error_2: GenericError = Box::new(external_error_1);
1150        let external_error_2: DataFusionError = generic_error_2.into();
1151
1152        println!("{external_error_2}");
1153        assert!(external_error_2
1154            .to_string()
1155            .starts_with("External error: io error"));
1156    }
1157
1158    /// Model what happens when implementing SendableRecordBatchStream:
1159    /// DataFusion code needs to return an ArrowError
1160    fn return_arrow_error() -> arrow::error::Result<()> {
1161        // Expect the '?' to work
1162        Err(DataFusionError::Plan("foo".to_string()).into())
1163    }
1164
1165    /// Model what happens when using arrow kernels in DataFusion
1166    /// code: need to turn an ArrowError into a DataFusionError
1167    fn return_datafusion_error() -> Result<()> {
1168        // Expect the '?' to work
1169        Err(ArrowError::SchemaError("bar".to_string()).into())
1170    }
1171
1172    fn do_root_test(e: DataFusionError, exp: DataFusionError) {
1173        let e = e.find_root();
1174
1175        // DataFusionError does not implement Eq, so we use a string comparison + some cheap "same variant" test instead
1176        assert_eq!(e.strip_backtrace(), exp.strip_backtrace());
1177        assert_eq!(std::mem::discriminant(e), std::mem::discriminant(&exp),)
1178    }
1179
1180    #[test]
1181    fn test_iter() {
1182        let err = DataFusionError::Collection(vec![
1183            DataFusionError::Plan("a".to_string()),
1184            DataFusionError::Collection(vec![
1185                DataFusionError::Plan("b".to_string()),
1186                DataFusionError::Plan("c".to_string()),
1187            ]),
1188        ]);
1189        let errs = err.iter().collect::<Vec<_>>();
1190        assert_eq!(errs.len(), 3);
1191        assert_eq!(errs[0].strip_backtrace(), "Error during planning: a");
1192        assert_eq!(errs[1].strip_backtrace(), "Error during planning: b");
1193        assert_eq!(errs[2].strip_backtrace(), "Error during planning: c");
1194    }
1195}