datafusion_common/
error.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! DataFusion error types
19#[cfg(feature = "backtrace")]
20use std::backtrace::{Backtrace, BacktraceStatus};
21
22use std::borrow::Cow;
23use std::collections::VecDeque;
24use std::error::Error;
25use std::fmt::{Display, Formatter};
26use std::io;
27use std::result;
28use std::sync::Arc;
29
30use crate::utils::datafusion_strsim::normalized_levenshtein;
31use crate::utils::quote_identifier;
32use crate::{Column, DFSchema, Diagnostic, TableReference};
33#[cfg(feature = "avro")]
34use apache_avro::Error as AvroError;
35use arrow::error::ArrowError;
36#[cfg(feature = "parquet")]
37use parquet::errors::ParquetError;
38use sqlparser::parser::ParserError;
39use tokio::task::JoinError;
40
41/// Result type for operations that could result in an [DataFusionError]
42pub type Result<T, E = DataFusionError> = result::Result<T, E>;
43
44/// Result type for operations that could result in an [DataFusionError] and needs to be shared (wrapped into `Arc`).
45pub type SharedResult<T> = result::Result<T, Arc<DataFusionError>>;
46
47/// Error type for generic operations that could result in DataFusionError::External
48pub type GenericError = Box<dyn Error + Send + Sync>;
49
50/// DataFusion error
51#[derive(Debug)]
52pub enum DataFusionError {
53    /// Error returned by arrow.
54    ///
55    /// 2nd argument is for optional backtrace
56    ArrowError(ArrowError, Option<String>),
57    /// Error when reading / writing Parquet data.
58    #[cfg(feature = "parquet")]
59    ParquetError(ParquetError),
60    /// Error when reading Avro data.
61    #[cfg(feature = "avro")]
62    AvroError(Box<AvroError>),
63    /// Error when reading / writing to / from an object_store (e.g. S3 or LocalFile)
64    #[cfg(feature = "object_store")]
65    ObjectStore(object_store::Error),
66    /// Error when an I/O operation fails
67    IoError(io::Error),
68    /// Error when SQL is syntactically incorrect.
69    ///
70    /// 2nd argument is for optional backtrace
71    SQL(ParserError, Option<String>),
72    /// Error when a feature is not yet implemented.
73    ///
74    /// These errors are sometimes returned for features that are still in
75    /// development and are not entirely complete. Often, these errors are
76    /// tracked in our issue tracker.
77    NotImplemented(String),
78    /// Error due to bugs in DataFusion
79    ///
80    /// This error should not happen in normal usage of DataFusion. It results
81    /// from something that wasn't expected/anticipated by the implementation
82    /// and that is most likely a bug (the error message even encourages users
83    /// to open a bug report). A user should not be able to trigger internal
84    /// errors under normal circumstances by feeding in malformed queries, bad
85    /// data, etc.
86    ///
87    /// Note that I/O errors (or any error that happens due to external systems)
88    /// do NOT fall under this category. See other variants such as
89    /// [`Self::IoError`] and [`Self::External`].
90    ///
91    /// DataFusions has internal invariants that the compiler is not always able
92    /// to check. This error is raised when one of those invariants does not
93    /// hold for some reason.
94    Internal(String),
95    /// Error during planning of the query.
96    ///
97    /// This error happens when the user provides a bad query or plan, for
98    /// example the user attempts to call a function that doesn't exist, or if
99    /// the types of a function call are not supported.
100    Plan(String),
101    /// Error for invalid or unsupported configuration options.
102    Configuration(String),
103    /// Error when there is a problem with the query related to schema.
104    ///
105    /// This error can be returned in cases such as when schema inference is not
106    /// possible and when column names are not unique.
107    ///
108    /// 2nd argument is for optional backtrace
109    /// Boxing the optional backtrace to prevent <https://rust-lang.github.io/rust-clippy/master/index.html#/result_large_err>
110    SchemaError(SchemaError, Box<Option<String>>),
111    /// Error during execution of the query.
112    ///
113    /// This error is returned when an error happens during execution due to a
114    /// malformed input. For example, the user passed malformed arguments to a
115    /// SQL method, opened a CSV file that is broken, or tried to divide an
116    /// integer by zero.
117    Execution(String),
118    /// [`JoinError`] during execution of the query.
119    ///
120    /// This error can't occur for unjoined tasks, such as execution shutdown.
121    ExecutionJoin(JoinError),
122    /// Error when resources (such as memory of scratch disk space) are exhausted.
123    ///
124    /// This error is thrown when a consumer cannot acquire additional memory
125    /// or other resources needed to execute the query from the Memory Manager.
126    ResourcesExhausted(String),
127    /// Errors originating from outside DataFusion's core codebase.
128    ///
129    /// For example, a custom S3Error from the crate datafusion-objectstore-s3
130    External(GenericError),
131    /// Error with additional context
132    Context(String, Box<DataFusionError>),
133    /// Errors from either mapping LogicalPlans to/from Substrait plans
134    /// or serializing/deserializing protobytes to Substrait plans
135    Substrait(String),
136    /// Error wrapped together with additional contextual information intended
137    /// for end users, to help them understand what went wrong by providing
138    /// human-readable messages, and locations in the source query that relate
139    /// to the error in some way.
140    Diagnostic(Box<Diagnostic>, Box<DataFusionError>),
141    /// A collection of one or more [`DataFusionError`]. Useful in cases where
142    /// DataFusion can recover from an erroneous state, and produce more errors
143    /// before terminating. e.g. when planning a SELECT clause, DataFusion can
144    /// synchronize to the next `SelectItem` if the previous one had errors. The
145    /// end result is that the user can see errors about all `SelectItem`,
146    /// instead of just the first one.
147    Collection(Vec<DataFusionError>),
148    /// A [`DataFusionError`] which shares an underlying [`DataFusionError`].
149    ///
150    /// This is useful when the same underlying [`DataFusionError`] is passed
151    /// to multiple receivers. For example, when the source of a repartition
152    /// errors and the error is propagated to multiple consumers.
153    Shared(Arc<DataFusionError>),
154}
155
156#[macro_export]
157macro_rules! context {
158    ($desc:expr, $err:expr) => {
159        $err.context(format!("{} at {}:{}", $desc, file!(), line!()))
160    };
161}
162
163/// Schema-related errors
164#[derive(Debug)]
165pub enum SchemaError {
166    /// Schema contains a (possibly) qualified and unqualified field with same unqualified name
167    AmbiguousReference { field: Column },
168    /// Schema contains duplicate qualified field name
169    DuplicateQualifiedField {
170        qualifier: Box<TableReference>,
171        name: String,
172    },
173    /// Schema contains duplicate unqualified field name
174    DuplicateUnqualifiedField { name: String },
175    /// No field with this name
176    FieldNotFound {
177        field: Box<Column>,
178        valid_fields: Vec<Column>,
179    },
180}
181
182impl Display for SchemaError {
183    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
184        match self {
185            Self::FieldNotFound {
186                field,
187                valid_fields,
188            } => {
189                write!(f, "No field named {}", field.quoted_flat_name())?;
190                let lower_valid_fields = valid_fields
191                    .iter()
192                    .map(|column| column.flat_name().to_lowercase())
193                    .collect::<Vec<String>>();
194
195                let valid_fields_names = valid_fields
196                    .iter()
197                    .map(|column| column.flat_name())
198                    .collect::<Vec<String>>();
199                if lower_valid_fields.contains(&field.flat_name().to_lowercase()) {
200                    write!(
201                        f,
202                        ". Column names are case sensitive. You can use double quotes to refer to the \"{}\" column \
203                        or set the datafusion.sql_parser.enable_ident_normalization configuration",
204                        field.quoted_flat_name()
205                    )?;
206                }
207                let field_name = field.name();
208                if let Some(matched) = valid_fields_names
209                    .iter()
210                    .filter(|str| normalized_levenshtein(str, field_name) >= 0.5)
211                    .collect::<Vec<&String>>()
212                    .first()
213                {
214                    write!(f, ". Did you mean '{matched}'?")?;
215                } else if !valid_fields.is_empty() {
216                    write!(
217                        f,
218                        ". Valid fields are {}",
219                        valid_fields
220                            .iter()
221                            .map(|field| field.quoted_flat_name())
222                            .collect::<Vec<String>>()
223                            .join(", ")
224                    )?;
225                }
226                write!(f, ".")
227            }
228            Self::DuplicateQualifiedField { qualifier, name } => {
229                write!(
230                    f,
231                    "Schema contains duplicate qualified field name {}.{}",
232                    qualifier.to_quoted_string(),
233                    quote_identifier(name)
234                )
235            }
236            Self::DuplicateUnqualifiedField { name } => {
237                write!(
238                    f,
239                    "Schema contains duplicate unqualified field name {}",
240                    quote_identifier(name)
241                )
242            }
243            Self::AmbiguousReference { field } => {
244                if field.relation.is_some() {
245                    write!(
246                        f,
247                        "Schema contains qualified field name {} and unqualified field name {} which would be ambiguous",
248                        field.quoted_flat_name(),
249                        quote_identifier(&field.name)
250                    )
251                } else {
252                    write!(
253                        f,
254                        "Ambiguous reference to unqualified field {}",
255                        field.quoted_flat_name()
256                    )
257                }
258            }
259        }
260    }
261}
262
263impl Error for SchemaError {}
264
265impl From<std::fmt::Error> for DataFusionError {
266    fn from(_e: std::fmt::Error) -> Self {
267        DataFusionError::Execution("Fail to format".to_string())
268    }
269}
270
271impl From<io::Error> for DataFusionError {
272    fn from(e: io::Error) -> Self {
273        DataFusionError::IoError(e)
274    }
275}
276
277impl From<ArrowError> for DataFusionError {
278    fn from(e: ArrowError) -> Self {
279        DataFusionError::ArrowError(e, None)
280    }
281}
282
283impl From<DataFusionError> for ArrowError {
284    fn from(e: DataFusionError) -> Self {
285        match e {
286            DataFusionError::ArrowError(e, _) => e,
287            DataFusionError::External(e) => ArrowError::ExternalError(e),
288            other => ArrowError::ExternalError(Box::new(other)),
289        }
290    }
291}
292
293impl From<&Arc<DataFusionError>> for DataFusionError {
294    fn from(e: &Arc<DataFusionError>) -> Self {
295        if let DataFusionError::Shared(e_inner) = e.as_ref() {
296            // don't re-wrap
297            DataFusionError::Shared(Arc::clone(e_inner))
298        } else {
299            DataFusionError::Shared(Arc::clone(e))
300        }
301    }
302}
303
304#[cfg(feature = "parquet")]
305impl From<ParquetError> for DataFusionError {
306    fn from(e: ParquetError) -> Self {
307        DataFusionError::ParquetError(e)
308    }
309}
310
311#[cfg(feature = "avro")]
312impl From<AvroError> for DataFusionError {
313    fn from(e: AvroError) -> Self {
314        DataFusionError::AvroError(Box::new(e))
315    }
316}
317
318#[cfg(feature = "object_store")]
319impl From<object_store::Error> for DataFusionError {
320    fn from(e: object_store::Error) -> Self {
321        DataFusionError::ObjectStore(e)
322    }
323}
324
325#[cfg(feature = "object_store")]
326impl From<object_store::path::Error> for DataFusionError {
327    fn from(e: object_store::path::Error) -> Self {
328        DataFusionError::ObjectStore(e.into())
329    }
330}
331
332impl From<ParserError> for DataFusionError {
333    fn from(e: ParserError) -> Self {
334        DataFusionError::SQL(e, None)
335    }
336}
337
338impl From<GenericError> for DataFusionError {
339    fn from(err: GenericError) -> Self {
340        // If the error is already a DataFusionError, not wrapping it.
341        if err.is::<DataFusionError>() {
342            if let Ok(e) = err.downcast::<DataFusionError>() {
343                *e
344            } else {
345                unreachable!()
346            }
347        } else {
348            DataFusionError::External(err)
349        }
350    }
351}
352
353impl Display for DataFusionError {
354    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
355        let error_prefix = self.error_prefix();
356        let message = self.message();
357        write!(f, "{error_prefix}{message}")
358    }
359}
360
361impl Error for DataFusionError {
362    fn source(&self) -> Option<&(dyn Error + 'static)> {
363        match self {
364            DataFusionError::ArrowError(e, _) => Some(e),
365            #[cfg(feature = "parquet")]
366            DataFusionError::ParquetError(e) => Some(e),
367            #[cfg(feature = "avro")]
368            DataFusionError::AvroError(e) => Some(e),
369            #[cfg(feature = "object_store")]
370            DataFusionError::ObjectStore(e) => Some(e),
371            DataFusionError::IoError(e) => Some(e),
372            DataFusionError::SQL(e, _) => Some(e),
373            DataFusionError::NotImplemented(_) => None,
374            DataFusionError::Internal(_) => None,
375            DataFusionError::Configuration(_) => None,
376            DataFusionError::Plan(_) => None,
377            DataFusionError::SchemaError(e, _) => Some(e),
378            DataFusionError::Execution(_) => None,
379            DataFusionError::ExecutionJoin(e) => Some(e),
380            DataFusionError::ResourcesExhausted(_) => None,
381            DataFusionError::External(e) => Some(e.as_ref()),
382            DataFusionError::Context(_, e) => Some(e.as_ref()),
383            DataFusionError::Substrait(_) => None,
384            DataFusionError::Diagnostic(_, e) => Some(e.as_ref()),
385            // Can't really make a Collection fit into the mold of "an error has
386            // at most one source", but returning the first one is probably good
387            // idea. Especially since `DataFusionError::Collection` is mostly
388            // meant for consumption by the end user, so shouldn't interfere
389            // with programmatic usage too much. Plus, having 1 or 5 errors
390            // doesn't really change the fact that the query is invalid and
391            // can't be executed.
392            DataFusionError::Collection(errs) => errs.first().map(|e| e as &dyn Error),
393            DataFusionError::Shared(e) => Some(e.as_ref()),
394        }
395    }
396}
397
398impl From<DataFusionError> for io::Error {
399    fn from(e: DataFusionError) -> Self {
400        io::Error::other(e)
401    }
402}
403
404impl DataFusionError {
405    /// The separator between the error message and the backtrace
406    pub const BACK_TRACE_SEP: &'static str = "\n\nbacktrace: ";
407
408    /// Get deepest underlying [`DataFusionError`]
409    ///
410    /// [`DataFusionError`]s sometimes form a chain, such as `DataFusionError::ArrowError()` in order to conform
411    /// to the correct error signature. Thus sometimes there is a chain several layers deep that can obscure the
412    /// original error. This function finds the lowest level DataFusionError possible.
413    ///
414    /// For example,  `find_root` will return`DataFusionError::ResourceExhausted` given the input
415    /// ```text
416    /// DataFusionError::ArrowError
417    ///   ArrowError::External
418    ///    Box(DataFusionError::Context)
419    ///      DataFusionError::ResourceExhausted
420    /// ```
421    ///
422    /// This may be the same as `self`.
423    pub fn find_root(&self) -> &Self {
424        // Note: This is a non-recursive algorithm so we do not run
425        // out of stack space, even for long error chains.
426
427        let mut last_datafusion_error = self;
428        let mut root_error: &dyn Error = self;
429        while let Some(source) = root_error.source() {
430            // walk the next level
431            root_error = source;
432            // remember the lowest datafusion error so far
433            if let Some(e) = root_error.downcast_ref::<DataFusionError>() {
434                last_datafusion_error = e;
435            } else if let Some(e) = root_error.downcast_ref::<Arc<DataFusionError>>() {
436                // As `Arc<T>::source()` calls through to `T::source()` we need to
437                // explicitly match `Arc<DataFusionError>` to capture it
438                last_datafusion_error = e.as_ref();
439            }
440        }
441        // return last checkpoint (which may be the original error)
442        last_datafusion_error
443    }
444
445    /// wraps self in Self::Context with a description
446    pub fn context(self, description: impl Into<String>) -> Self {
447        Self::Context(description.into(), Box::new(self))
448    }
449
450    /// Strips backtrace out of the error message
451    /// If backtrace enabled then error has a format "message" [`Self::BACK_TRACE_SEP`] "backtrace"
452    /// The method strips the backtrace and outputs "message"
453    pub fn strip_backtrace(&self) -> String {
454        self.to_string()
455            .split(Self::BACK_TRACE_SEP)
456            .collect::<Vec<&str>>()
457            .first()
458            .unwrap_or(&"")
459            .to_string()
460    }
461
462    /// To enable optional rust backtrace in DataFusion:
463    /// - [`Setup Env Variables`]<https://doc.rust-lang.org/std/backtrace/index.html#environment-variables>
464    /// - Enable `backtrace` cargo feature
465    ///
466    /// Example:
467    /// cargo build --features 'backtrace'
468    /// RUST_BACKTRACE=1 ./app
469    #[inline(always)]
470    pub fn get_back_trace() -> String {
471        #[cfg(feature = "backtrace")]
472        {
473            let back_trace = Backtrace::capture();
474            if back_trace.status() == BacktraceStatus::Captured {
475                return format!("{}{}", Self::BACK_TRACE_SEP, back_trace);
476            }
477
478            "".to_owned()
479        }
480
481        #[cfg(not(feature = "backtrace"))]
482        "".to_owned()
483    }
484
485    /// Return a [`DataFusionErrorBuilder`] to build a [`DataFusionError`]
486    pub fn builder() -> DataFusionErrorBuilder {
487        DataFusionErrorBuilder::default()
488    }
489
490    fn error_prefix(&self) -> &'static str {
491        match self {
492            DataFusionError::ArrowError(_, _) => "Arrow error: ",
493            #[cfg(feature = "parquet")]
494            DataFusionError::ParquetError(_) => "Parquet error: ",
495            #[cfg(feature = "avro")]
496            DataFusionError::AvroError(_) => "Avro error: ",
497            #[cfg(feature = "object_store")]
498            DataFusionError::ObjectStore(_) => "Object Store error: ",
499            DataFusionError::IoError(_) => "IO error: ",
500            DataFusionError::SQL(_, _) => "SQL error: ",
501            DataFusionError::NotImplemented(_) => {
502                "This feature is not implemented: "
503            }
504            DataFusionError::Internal(_) => "Internal error: ",
505            DataFusionError::Plan(_) => "Error during planning: ",
506            DataFusionError::Configuration(_) => {
507                "Invalid or Unsupported Configuration: "
508            }
509            DataFusionError::SchemaError(_, _) => "Schema error: ",
510            DataFusionError::Execution(_) => "Execution error: ",
511            DataFusionError::ExecutionJoin(_) => "ExecutionJoin error: ",
512            DataFusionError::ResourcesExhausted(_) => {
513                "Resources exhausted: "
514            }
515            DataFusionError::External(_) => "External error: ",
516            DataFusionError::Context(_, _) => "",
517            DataFusionError::Substrait(_) => "Substrait error: ",
518            DataFusionError::Diagnostic(_, _) => "",
519            DataFusionError::Collection(errs) => {
520                errs.first().expect("cannot construct DataFusionError::Collection with 0 errors, but got one such case").error_prefix()
521            }
522            DataFusionError::Shared(_) => "",
523        }
524    }
525
526    pub fn message(&self) -> Cow<str> {
527        match *self {
528            DataFusionError::ArrowError(ref desc, ref backtrace) => {
529                let backtrace = backtrace.clone().unwrap_or_else(|| "".to_owned());
530                Cow::Owned(format!("{desc}{backtrace}"))
531            }
532            #[cfg(feature = "parquet")]
533            DataFusionError::ParquetError(ref desc) => Cow::Owned(desc.to_string()),
534            #[cfg(feature = "avro")]
535            DataFusionError::AvroError(ref desc) => Cow::Owned(desc.to_string()),
536            DataFusionError::IoError(ref desc) => Cow::Owned(desc.to_string()),
537            DataFusionError::SQL(ref desc, ref backtrace) => {
538                let backtrace: String =
539                    backtrace.clone().unwrap_or_else(|| "".to_owned());
540                Cow::Owned(format!("{desc:?}{backtrace}"))
541            }
542            DataFusionError::Configuration(ref desc) => Cow::Owned(desc.to_string()),
543            DataFusionError::NotImplemented(ref desc) => Cow::Owned(desc.to_string()),
544            DataFusionError::Internal(ref desc) => Cow::Owned(format!(
545                "{desc}.\nThis was likely caused by a bug in DataFusion's \
546            code and we would welcome that you file an bug report in our issue tracker"
547            )),
548            DataFusionError::Plan(ref desc) => Cow::Owned(desc.to_string()),
549            DataFusionError::SchemaError(ref desc, ref backtrace) => {
550                let backtrace: &str =
551                    &backtrace.as_ref().clone().unwrap_or_else(|| "".to_owned());
552                Cow::Owned(format!("{desc}{backtrace}"))
553            }
554            DataFusionError::Execution(ref desc) => Cow::Owned(desc.to_string()),
555            DataFusionError::ExecutionJoin(ref desc) => Cow::Owned(desc.to_string()),
556            DataFusionError::ResourcesExhausted(ref desc) => Cow::Owned(desc.to_string()),
557            DataFusionError::External(ref desc) => Cow::Owned(desc.to_string()),
558            #[cfg(feature = "object_store")]
559            DataFusionError::ObjectStore(ref desc) => Cow::Owned(desc.to_string()),
560            DataFusionError::Context(ref desc, ref err) => {
561                Cow::Owned(format!("{desc}\ncaused by\n{}", *err))
562            }
563            DataFusionError::Substrait(ref desc) => Cow::Owned(desc.to_string()),
564            DataFusionError::Diagnostic(_, ref err) => Cow::Owned(err.to_string()),
565            // Returning the message of the first error is probably fine enough,
566            // and makes `DataFusionError::Collection` a transparent wrapped,
567            // unless the end user explicitly calls `DataFusionError::iter`.
568            DataFusionError::Collection(ref errs) => errs
569                .first()
570                .expect("cannot construct DataFusionError::Collection with 0 errors")
571                .message(),
572            DataFusionError::Shared(ref desc) => Cow::Owned(desc.to_string()),
573        }
574    }
575
576    /// Wraps the error with contextual information intended for end users
577    pub fn with_diagnostic(self, diagnostic: Diagnostic) -> Self {
578        Self::Diagnostic(Box::new(diagnostic), Box::new(self))
579    }
580
581    /// Wraps the error with contextual information intended for end users.
582    /// Takes a function that inspects the error and returns the diagnostic to
583    /// wrap it with.
584    pub fn with_diagnostic_fn<F: FnOnce(&DataFusionError) -> Diagnostic>(
585        self,
586        f: F,
587    ) -> Self {
588        let diagnostic = f(&self);
589        self.with_diagnostic(diagnostic)
590    }
591
592    /// Gets the [`Diagnostic`] associated with the error, if any. If there is
593    /// more than one, only the outermost [`Diagnostic`] is returned.
594    pub fn diagnostic(&self) -> Option<&Diagnostic> {
595        struct DiagnosticsIterator<'a> {
596            head: &'a DataFusionError,
597        }
598
599        impl<'a> Iterator for DiagnosticsIterator<'a> {
600            type Item = &'a Diagnostic;
601
602            fn next(&mut self) -> Option<Self::Item> {
603                loop {
604                    if let DataFusionError::Diagnostic(diagnostics, source) = self.head {
605                        self.head = source.as_ref();
606                        return Some(diagnostics);
607                    }
608
609                    if let Some(source) = self
610                        .head
611                        .source()
612                        .and_then(|source| source.downcast_ref::<DataFusionError>())
613                    {
614                        self.head = source;
615                    } else {
616                        return None;
617                    }
618                }
619            }
620        }
621
622        DiagnosticsIterator { head: self }.next()
623    }
624
625    /// Return an iterator over this [`DataFusionError`] and any other
626    /// [`DataFusionError`]s in a [`DataFusionError::Collection`].
627    ///
628    /// Sometimes DataFusion is able to collect multiple errors in a SQL query
629    /// before terminating, e.g. across different expressions in a SELECT
630    /// statements or different sides of a UNION. This method returns an
631    /// iterator over all the errors in the collection.
632    ///
633    /// For this to work, the top-level error must be a
634    /// `DataFusionError::Collection`, not something that contains it.
635    pub fn iter(&self) -> impl Iterator<Item = &DataFusionError> {
636        struct ErrorIterator<'a> {
637            queue: VecDeque<&'a DataFusionError>,
638        }
639
640        impl<'a> Iterator for ErrorIterator<'a> {
641            type Item = &'a DataFusionError;
642
643            fn next(&mut self) -> Option<Self::Item> {
644                loop {
645                    let popped = self.queue.pop_front()?;
646                    match popped {
647                        DataFusionError::Collection(errs) => self.queue.extend(errs),
648                        _ => return Some(popped),
649                    }
650                }
651            }
652        }
653
654        let mut queue = VecDeque::new();
655        queue.push_back(self);
656        ErrorIterator { queue }
657    }
658}
659
660/// A builder for [`DataFusionError`]
661///
662/// This builder can be used to collect multiple errors and return them as a
663/// [`DataFusionError::Collection`].
664///
665/// # Example: no errors
666/// ```
667/// # use datafusion_common::DataFusionError;
668/// let mut builder = DataFusionError::builder();
669/// // ok_or returns the value if no errors have been added
670/// assert_eq!(builder.error_or(42).unwrap(), 42);
671/// ```
672///
673/// # Example: with errors
674/// ```
675/// # use datafusion_common::{assert_contains, DataFusionError};
676/// let mut builder = DataFusionError::builder();
677/// builder.add_error(DataFusionError::Internal("foo".to_owned()));
678/// // ok_or returns the value if no errors have been added
679/// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
680/// ```
681#[derive(Debug, Default)]
682pub struct DataFusionErrorBuilder(Vec<DataFusionError>);
683
684impl DataFusionErrorBuilder {
685    /// Create a new [`DataFusionErrorBuilder`]
686    pub fn new() -> Self {
687        Default::default()
688    }
689
690    /// Add an error to the in progress list
691    ///
692    /// # Example
693    /// ```
694    /// # use datafusion_common::{assert_contains, DataFusionError};
695    /// let mut builder = DataFusionError::builder();
696    /// builder.add_error(DataFusionError::Internal("foo".to_owned()));
697    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
698    /// ```
699    pub fn add_error(&mut self, error: DataFusionError) {
700        self.0.push(error);
701    }
702
703    /// Add an error to the in progress list, returning the builder
704    ///
705    /// # Example
706    /// ```
707    /// # use datafusion_common::{assert_contains, DataFusionError};
708    /// let builder = DataFusionError::builder()
709    ///   .with_error(DataFusionError::Internal("foo".to_owned()));
710    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
711    /// ```
712    pub fn with_error(mut self, error: DataFusionError) -> Self {
713        self.0.push(error);
714        self
715    }
716
717    /// Returns `Ok(ok)` if no errors were added to the builder,
718    /// otherwise returns a `Result::Err`
719    pub fn error_or<T>(self, ok: T) -> Result<T, DataFusionError> {
720        match self.0.len() {
721            0 => Ok(ok),
722            1 => Err(self.0.into_iter().next().expect("length matched 1")),
723            _ => Err(DataFusionError::Collection(self.0)),
724        }
725    }
726}
727
728/// Unwrap an `Option` if possible. Otherwise return an `DataFusionError::Internal`.
729/// In normal usage of DataFusion the unwrap should always succeed.
730///
731/// Example: `let values = unwrap_or_internal_err!(values)`
732#[macro_export]
733macro_rules! unwrap_or_internal_err {
734    ($Value: ident) => {
735        $Value.ok_or_else(|| {
736            DataFusionError::Internal(format!(
737                "{} should not be None",
738                stringify!($Value)
739            ))
740        })?
741    };
742}
743
744/// Add a macros for concise  DataFusionError::* errors declaration
745/// supports placeholders the same way as `format!`
746/// Examples:
747///     plan_err!("Error")
748///     plan_err!("Error {}", val)
749///     plan_err!("Error {:?}", val)
750///     plan_err!("Error {val}")
751///     plan_err!("Error {val:?}")
752///
753/// `NAME_ERR` -  macro name for wrapping Err(DataFusionError::*)
754/// `NAME_DF_ERR` -  macro name for wrapping DataFusionError::*. Needed to keep backtrace opportunity
755/// in construction where DataFusionError::* used directly, like `map_err`, `ok_or_else`, etc
756macro_rules! make_error {
757    ($NAME_ERR:ident, $NAME_DF_ERR: ident, $ERR:ident) => { make_error!(@inner ($), $NAME_ERR, $NAME_DF_ERR, $ERR); };
758    (@inner ($d:tt), $NAME_ERR:ident, $NAME_DF_ERR:ident, $ERR:ident) => {
759        ::paste::paste!{
760            /// Macro wraps `$ERR` to add backtrace feature
761            #[macro_export]
762            macro_rules! $NAME_DF_ERR {
763                ($d($d args:expr),* $d(; diagnostic=$d DIAG:expr)?) => {{
764                    let err =$crate::DataFusionError::$ERR(
765                        ::std::format!(
766                            "{}{}",
767                            ::std::format!($d($d args),*),
768                            $crate::DataFusionError::get_back_trace(),
769                        ).into()
770                    );
771                    $d (
772                        let err = err.with_diagnostic($d DIAG);
773                    )?
774                    err
775                }
776            }
777        }
778
779            /// Macro wraps Err(`$ERR`) to add backtrace feature
780            #[macro_export]
781            macro_rules! $NAME_ERR {
782                ($d($d args:expr),* $d(; diagnostic = $d DIAG:expr)?) => {{
783                    let err = $crate::[<_ $NAME_DF_ERR>]!($d($d args),*);
784                    $d (
785                        let err = err.with_diagnostic($d DIAG);
786                    )?
787                    Err(err)
788
789                }}
790            }
791
792
793            // Note: Certain macros are used in this  crate, but not all.
794            // This macro generates a use or all of them in case they are needed
795            // so we allow unused code to avoid warnings when they are not used
796            #[doc(hidden)]
797            #[allow(unused)]
798            pub use $NAME_ERR as [<_ $NAME_ERR>];
799            #[doc(hidden)]
800            #[allow(unused)]
801            pub use $NAME_DF_ERR as [<_ $NAME_DF_ERR>];
802        }
803    };
804}
805
806// Exposes a macro to create `DataFusionError::Plan` with optional backtrace
807make_error!(plan_err, plan_datafusion_err, Plan);
808
809// Exposes a macro to create `DataFusionError::Internal` with optional backtrace
810make_error!(internal_err, internal_datafusion_err, Internal);
811
812// Exposes a macro to create `DataFusionError::NotImplemented` with optional backtrace
813make_error!(not_impl_err, not_impl_datafusion_err, NotImplemented);
814
815// Exposes a macro to create `DataFusionError::Execution` with optional backtrace
816make_error!(exec_err, exec_datafusion_err, Execution);
817
818// Exposes a macro to create `DataFusionError::Configuration` with optional backtrace
819make_error!(config_err, config_datafusion_err, Configuration);
820
821// Exposes a macro to create `DataFusionError::Substrait` with optional backtrace
822make_error!(substrait_err, substrait_datafusion_err, Substrait);
823
824// Exposes a macro to create `DataFusionError::ResourcesExhausted` with optional backtrace
825make_error!(resources_err, resources_datafusion_err, ResourcesExhausted);
826
827// Exposes a macro to create `DataFusionError::SQL` with optional backtrace
828#[macro_export]
829macro_rules! sql_datafusion_err {
830    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
831        let err = DataFusionError::SQL($ERR, Some(DataFusionError::get_back_trace()));
832        $(
833            let err = err.with_diagnostic($DIAG);
834        )?
835        err
836    }};
837}
838
839// Exposes a macro to create `Err(DataFusionError::SQL)` with optional backtrace
840#[macro_export]
841macro_rules! sql_err {
842    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
843        let err = datafusion_common::sql_datafusion_err!($ERR);
844        $(
845            let err = err.with_diagnostic($DIAG);
846        )?
847        Err(err)
848    }};
849}
850
851// Exposes a macro to create `DataFusionError::ArrowError` with optional backtrace
852#[macro_export]
853macro_rules! arrow_datafusion_err {
854    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
855        let err = DataFusionError::ArrowError($ERR, Some(DataFusionError::get_back_trace()));
856        $(
857            let err = err.with_diagnostic($DIAG);
858        )?
859        err
860    }};
861}
862
863// Exposes a macro to create `Err(DataFusionError::ArrowError)` with optional backtrace
864#[macro_export]
865macro_rules! arrow_err {
866    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {
867    {
868        let err = datafusion_common::arrow_datafusion_err!($ERR);
869        $(
870            let err = err.with_diagnostic($DIAG);
871        )?
872        Err(err)
873    }};
874}
875
876// Exposes a macro to create `DataFusionError::SchemaError` with optional backtrace
877#[macro_export]
878macro_rules! schema_datafusion_err {
879    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
880        let err = $crate::error::DataFusionError::SchemaError(
881            $ERR,
882            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
883        );
884        $(
885            let err = err.with_diagnostic($DIAG);
886        )?
887        err
888    }};
889}
890
891// Exposes a macro to create `Err(DataFusionError::SchemaError)` with optional backtrace
892#[macro_export]
893macro_rules! schema_err {
894    ($ERR:expr $(; diagnostic = $DIAG:expr)?) => {{
895        let err = $crate::error::DataFusionError::SchemaError(
896            $ERR,
897            Box::new(Some($crate::error::DataFusionError::get_back_trace())),
898        );
899        $(
900            let err = err.with_diagnostic($DIAG);
901        )?
902        Err(err)
903    }
904    };
905}
906
907// To avoid compiler error when using macro in the same crate:
908// macros from the current crate cannot be referred to by absolute paths
909pub use schema_err as _schema_err;
910
911/// Create a "field not found" DataFusion::SchemaError
912pub fn field_not_found<R: Into<TableReference>>(
913    qualifier: Option<R>,
914    name: &str,
915    schema: &DFSchema,
916) -> DataFusionError {
917    schema_datafusion_err!(SchemaError::FieldNotFound {
918        field: Box::new(Column::new(qualifier, name)),
919        valid_fields: schema.columns().to_vec(),
920    })
921}
922
923/// Convenience wrapper over [`field_not_found`] for when there is no qualifier
924pub fn unqualified_field_not_found(name: &str, schema: &DFSchema) -> DataFusionError {
925    schema_datafusion_err!(SchemaError::FieldNotFound {
926        field: Box::new(Column::new_unqualified(name)),
927        valid_fields: schema.columns().to_vec(),
928    })
929}
930
931pub fn add_possible_columns_to_diag(
932    diagnostic: &mut Diagnostic,
933    field: &Column,
934    valid_fields: &[Column],
935) {
936    let field_names: Vec<String> = valid_fields
937        .iter()
938        .filter_map(|f| {
939            if normalized_levenshtein(f.name(), field.name()) >= 0.5 {
940                Some(f.flat_name())
941            } else {
942                None
943            }
944        })
945        .collect();
946
947    for name in field_names {
948        diagnostic.add_note(format!("possible column {name}"), None);
949    }
950}
951
952#[cfg(test)]
953mod test {
954    use std::sync::Arc;
955
956    use crate::error::{DataFusionError, GenericError};
957    use arrow::error::ArrowError;
958
959    #[test]
960    fn datafusion_error_to_arrow() {
961        let res = return_arrow_error().unwrap_err();
962        assert!(res
963            .to_string()
964            .starts_with("External error: Error during planning: foo"));
965    }
966
967    #[test]
968    fn arrow_error_to_datafusion() {
969        let res = return_datafusion_error().unwrap_err();
970        assert_eq!(res.strip_backtrace(), "Arrow error: Schema error: bar");
971    }
972
973    // To pass the test the environment variable RUST_BACKTRACE should be set to 1 to enforce backtrace
974    #[cfg(feature = "backtrace")]
975    #[test]
976    #[allow(clippy::unnecessary_literal_unwrap)]
977    fn test_enabled_backtrace() {
978        match std::env::var("RUST_BACKTRACE") {
979            Ok(val) if val == "1" => {}
980            _ => panic!("Environment variable RUST_BACKTRACE must be set to 1"),
981        };
982
983        let res: Result<(), DataFusionError> = plan_err!("Err");
984        let err = res.unwrap_err().to_string();
985        assert!(err.contains(DataFusionError::BACK_TRACE_SEP));
986        assert_eq!(
987            err.split(DataFusionError::BACK_TRACE_SEP)
988                .collect::<Vec<&str>>()
989                .first()
990                .unwrap(),
991            &"Error during planning: Err"
992        );
993        assert!(!err
994            .split(DataFusionError::BACK_TRACE_SEP)
995            .collect::<Vec<&str>>()
996            .get(1)
997            .unwrap()
998            .is_empty());
999    }
1000
1001    #[cfg(not(feature = "backtrace"))]
1002    #[test]
1003    #[allow(clippy::unnecessary_literal_unwrap)]
1004    fn test_disabled_backtrace() {
1005        let res: Result<(), DataFusionError> = plan_err!("Err");
1006        let res = res.unwrap_err().to_string();
1007        assert!(!res.contains(DataFusionError::BACK_TRACE_SEP));
1008        assert_eq!(res, "Error during planning: Err");
1009    }
1010
1011    #[test]
1012    fn test_find_root_error() {
1013        do_root_test(
1014            DataFusionError::Context(
1015                "it happened!".to_string(),
1016                Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
1017            ),
1018            DataFusionError::ResourcesExhausted("foo".to_string()),
1019        );
1020
1021        do_root_test(
1022            DataFusionError::ArrowError(
1023                ArrowError::ExternalError(Box::new(DataFusionError::ResourcesExhausted(
1024                    "foo".to_string(),
1025                ))),
1026                None,
1027            ),
1028            DataFusionError::ResourcesExhausted("foo".to_string()),
1029        );
1030
1031        do_root_test(
1032            DataFusionError::External(Box::new(DataFusionError::ResourcesExhausted(
1033                "foo".to_string(),
1034            ))),
1035            DataFusionError::ResourcesExhausted("foo".to_string()),
1036        );
1037
1038        do_root_test(
1039            DataFusionError::External(Box::new(ArrowError::ExternalError(Box::new(
1040                DataFusionError::ResourcesExhausted("foo".to_string()),
1041            )))),
1042            DataFusionError::ResourcesExhausted("foo".to_string()),
1043        );
1044
1045        do_root_test(
1046            DataFusionError::ArrowError(
1047                ArrowError::ExternalError(Box::new(ArrowError::ExternalError(Box::new(
1048                    DataFusionError::ResourcesExhausted("foo".to_string()),
1049                )))),
1050                None,
1051            ),
1052            DataFusionError::ResourcesExhausted("foo".to_string()),
1053        );
1054
1055        do_root_test(
1056            DataFusionError::External(Box::new(Arc::new(
1057                DataFusionError::ResourcesExhausted("foo".to_string()),
1058            ))),
1059            DataFusionError::ResourcesExhausted("foo".to_string()),
1060        );
1061
1062        do_root_test(
1063            DataFusionError::External(Box::new(Arc::new(ArrowError::ExternalError(
1064                Box::new(DataFusionError::ResourcesExhausted("foo".to_string())),
1065            )))),
1066            DataFusionError::ResourcesExhausted("foo".to_string()),
1067        );
1068    }
1069
1070    #[test]
1071    #[allow(clippy::unnecessary_literal_unwrap)]
1072    fn test_make_error_parse_input() {
1073        let res: Result<(), DataFusionError> = plan_err!("Err");
1074        let res = res.unwrap_err();
1075        assert_eq!(res.strip_backtrace(), "Error during planning: Err");
1076
1077        let extra1 = "extra1";
1078        let extra2 = "extra2";
1079
1080        let res: Result<(), DataFusionError> = plan_err!("Err {} {}", extra1, extra2);
1081        let res = res.unwrap_err();
1082        assert_eq!(
1083            res.strip_backtrace(),
1084            "Error during planning: Err extra1 extra2"
1085        );
1086
1087        let res: Result<(), DataFusionError> =
1088            plan_err!("Err {:?} {:#?}", extra1, extra2);
1089        let res = res.unwrap_err();
1090        assert_eq!(
1091            res.strip_backtrace(),
1092            "Error during planning: Err \"extra1\" \"extra2\""
1093        );
1094
1095        let res: Result<(), DataFusionError> = plan_err!("Err {extra1} {extra2}");
1096        let res = res.unwrap_err();
1097        assert_eq!(
1098            res.strip_backtrace(),
1099            "Error during planning: Err extra1 extra2"
1100        );
1101
1102        let res: Result<(), DataFusionError> = plan_err!("Err {extra1:?} {extra2:#?}");
1103        let res = res.unwrap_err();
1104        assert_eq!(
1105            res.strip_backtrace(),
1106            "Error during planning: Err \"extra1\" \"extra2\""
1107        );
1108    }
1109
1110    #[test]
1111    fn external_error() {
1112        // assert not wrapping DataFusionError
1113        let generic_error: GenericError =
1114            Box::new(DataFusionError::Plan("test".to_string()));
1115        let datafusion_error: DataFusionError = generic_error.into();
1116        println!("{}", datafusion_error.strip_backtrace());
1117        assert_eq!(
1118            datafusion_error.strip_backtrace(),
1119            "Error during planning: test"
1120        );
1121
1122        // assert wrapping other Error
1123        let generic_error: GenericError = Box::new(std::io::Error::other("io error"));
1124        let datafusion_error: DataFusionError = generic_error.into();
1125        println!("{}", datafusion_error.strip_backtrace());
1126        assert_eq!(
1127            datafusion_error.strip_backtrace(),
1128            "External error: io error"
1129        );
1130    }
1131
1132    #[test]
1133    fn external_error_no_recursive() {
1134        let generic_error_1: GenericError = Box::new(std::io::Error::other("io error"));
1135        let external_error_1: DataFusionError = generic_error_1.into();
1136        let generic_error_2: GenericError = Box::new(external_error_1);
1137        let external_error_2: DataFusionError = generic_error_2.into();
1138
1139        println!("{external_error_2}");
1140        assert!(external_error_2
1141            .to_string()
1142            .starts_with("External error: io error"));
1143    }
1144
1145    /// Model what happens when implementing SendableRecordBatchStream:
1146    /// DataFusion code needs to return an ArrowError
1147    fn return_arrow_error() -> arrow::error::Result<()> {
1148        // Expect the '?' to work
1149        Err(DataFusionError::Plan("foo".to_string()).into())
1150    }
1151
1152    /// Model what happens when using arrow kernels in DataFusion
1153    /// code: need to turn an ArrowError into a DataFusionError
1154    fn return_datafusion_error() -> crate::error::Result<()> {
1155        // Expect the '?' to work
1156        Err(ArrowError::SchemaError("bar".to_string()).into())
1157    }
1158
1159    fn do_root_test(e: DataFusionError, exp: DataFusionError) {
1160        let e = e.find_root();
1161
1162        // DataFusionError does not implement Eq, so we use a string comparison + some cheap "same variant" test instead
1163        assert_eq!(e.strip_backtrace(), exp.strip_backtrace());
1164        assert_eq!(std::mem::discriminant(e), std::mem::discriminant(&exp),)
1165    }
1166
1167    #[test]
1168    fn test_iter() {
1169        let err = DataFusionError::Collection(vec![
1170            DataFusionError::Plan("a".to_string()),
1171            DataFusionError::Collection(vec![
1172                DataFusionError::Plan("b".to_string()),
1173                DataFusionError::Plan("c".to_string()),
1174            ]),
1175        ]);
1176        let errs = err.iter().collect::<Vec<_>>();
1177        assert_eq!(errs.len(), 3);
1178        assert_eq!(errs[0].strip_backtrace(), "Error during planning: a");
1179        assert_eq!(errs[1].strip_backtrace(), "Error during planning: b");
1180        assert_eq!(errs[2].strip_backtrace(), "Error during planning: c");
1181    }
1182}