Skip to main content

datafusion_common/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19    html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20    html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_cfg))]
23// Make sure fast / cheap clones on Arc are explicit:
24// https://github.com/apache/datafusion/issues/11143
25#![deny(clippy::clone_on_ref_ptr)]
26#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
27
28mod column;
29mod dfschema;
30mod functional_dependencies;
31mod join_type;
32mod param_value;
33mod schema_reference;
34mod table_reference;
35mod unnest;
36
37pub mod alias;
38pub mod cast;
39pub mod config;
40pub mod cse;
41pub mod datatype;
42pub mod diagnostic;
43pub mod display;
44pub mod encryption;
45pub mod error;
46pub mod extensions;
47pub mod file_options;
48pub mod format;
49pub mod hash_utils;
50pub mod heap_size;
51pub mod instant;
52pub mod metadata;
53pub mod nested_struct;
54mod null_equality;
55pub mod parquet_config;
56pub mod parsers;
57pub mod pruning;
58pub mod rounding;
59pub mod scalar;
60pub mod spans;
61pub mod stats;
62pub mod test_util;
63pub mod tree_node;
64pub mod types;
65pub mod utils;
66
67/// Reexport arrow crate
68pub use arrow;
69pub use column::Column;
70pub use dfschema::{
71    DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema, qualified_name,
72};
73pub use diagnostic::Diagnostic;
74pub use display::human_readable::{
75    human_readable_count, human_readable_duration, human_readable_size, units,
76};
77pub use error::{
78    DataFusionError, Result, SchemaError, SharedResult, field_not_found,
79    unqualified_field_not_found,
80};
81pub use file_options::file_type::{
82    DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
83    DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION, GetExt,
84};
85pub use functional_dependencies::{
86    Constraint, Constraints, Dependency, FunctionalDependence, FunctionalDependencies,
87    aggregate_functional_dependencies, get_required_group_by_exprs_indices,
88    get_required_sort_exprs_indices, get_target_functional_dependencies,
89};
90use hashbrown::DefaultHashBuilder;
91pub use join_type::{JoinConstraint, JoinSide, JoinType};
92pub use nested_struct::cast_column;
93pub use null_equality::NullEquality;
94pub use param_value::ParamValues;
95pub use scalar::{ScalarType, ScalarValue};
96pub use schema_reference::SchemaReference;
97pub use spans::{Location, Span, Spans};
98pub use stats::{ColumnStatistics, Statistics};
99pub use table_reference::{ResolvedTableReference, TableReference};
100pub use unnest::{RecursionUnnestOption, UnnestOptions};
101pub use utils::project_schema;
102
103// These are hidden from docs purely to avoid polluting the public view of what this crate exports.
104// These are just re-exports of macros by the same name, which gets around the 'cannot refer to
105// macro-expanded macro_export macros by their full path' error.
106// The design to get around this comes from this comment:
107// https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997
108#[doc(hidden)]
109pub use error::{
110    _config_datafusion_err, _exec_datafusion_err, _ffi_datafusion_err,
111    _internal_datafusion_err, _not_impl_datafusion_err, _plan_datafusion_err,
112    _resources_datafusion_err, _substrait_datafusion_err,
113};
114
115// The HashMap and HashSet implementations that should be used as the uniform defaults
116pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
117pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
118pub mod hash_map {
119    pub use hashbrown::hash_map::Entry;
120    pub use hashbrown::hash_map::EntryRef;
121}
122pub mod hash_set {
123    pub use hashbrown::hash_set::Entry;
124}
125
126/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
127/// not possible. In normal usage of DataFusion the downcast should always succeed.
128///
129/// Example: `let array = downcast_value!(values, Int32Array)`
130#[macro_export]
131macro_rules! downcast_value {
132    ($Value: expr, $Type: ident) => {{
133        use $crate::__private::DowncastArrayHelper;
134        $Value.downcast_array_helper::<$Type>()?
135    }};
136    ($Value: expr, $Type: ident, $T: tt) => {{
137        use $crate::__private::DowncastArrayHelper;
138        $Value.downcast_array_helper::<$Type<$T>>()?
139    }};
140}
141
142// Not public API.
143#[doc(hidden)]
144pub mod __private {
145    use crate::Result;
146    use crate::error::_internal_datafusion_err;
147    use arrow::array::Array;
148    use std::any::{Any, type_name};
149
150    #[doc(hidden)]
151    pub trait DowncastArrayHelper {
152        fn downcast_array_helper<U: Any>(&self) -> Result<&U>;
153    }
154
155    impl<T: Array + ?Sized> DowncastArrayHelper for T {
156        fn downcast_array_helper<U: Any>(&self) -> Result<&U> {
157            self.as_any().downcast_ref().ok_or_else(|| {
158                let actual_type = self.data_type();
159                let desired_type_name = type_name::<U>();
160                _internal_datafusion_err!(
161                    "could not cast array of type {} to {}",
162                    actual_type,
163                    desired_type_name
164                )
165            })
166        }
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use arrow::array::{ArrayRef, Int32Array, UInt64Array};
173    use std::any::{type_name, type_name_of_val};
174    use std::sync::Arc;
175
176    #[test]
177    fn test_downcast_value() -> crate::Result<()> {
178        let boxed: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
179        let array = downcast_value!(&boxed, Int32Array);
180        assert_eq!(type_name_of_val(&array), type_name::<&Int32Array>());
181
182        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
183        assert_eq!(array, &expected);
184        Ok(())
185    }
186
187    #[test]
188    fn test_downcast_value_err_message() {
189        let boxed: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
190        let error: crate::DataFusionError = (|| {
191            downcast_value!(&boxed, UInt64Array);
192            Ok(())
193        })()
194        .err()
195        .unwrap();
196
197        assert_starts_with(
198            error.to_string(),
199            "Internal error: could not cast array of type Int32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::UInt64Type>",
200        );
201    }
202
203    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
204    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
205    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
206        let actual = actual.as_ref();
207        let expected_prefix = expected_prefix.as_ref();
208        assert!(
209            actual.starts_with(expected_prefix),
210            "Expected '{actual}' to start with '{expected_prefix}'"
211        );
212    }
213}