datafusion_common/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19    html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20    html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_cfg))]
23// Make sure fast / cheap clones on Arc are explicit:
24// https://github.com/apache/datafusion/issues/11143
25#![deny(clippy::clone_on_ref_ptr)]
26// https://github.com/apache/datafusion/issues/18503
27#![deny(clippy::needless_pass_by_value)]
28#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
29
30mod column;
31mod dfschema;
32mod functional_dependencies;
33mod join_type;
34mod param_value;
35#[cfg(feature = "pyarrow")]
36mod pyarrow;
37mod schema_reference;
38mod table_reference;
39mod unnest;
40
41pub mod alias;
42pub mod cast;
43pub mod config;
44pub mod cse;
45pub mod datatype;
46pub mod diagnostic;
47pub mod display;
48pub mod encryption;
49pub mod error;
50pub mod file_options;
51pub mod format;
52pub mod hash_utils;
53pub mod instant;
54pub mod metadata;
55pub mod nested_struct;
56mod null_equality;
57pub mod parsers;
58pub mod pruning;
59pub mod rounding;
60pub mod scalar;
61pub mod spans;
62pub mod stats;
63pub mod test_util;
64pub mod tree_node;
65pub mod types;
66pub mod utils;
67
68/// Reexport arrow crate
69pub use arrow;
70pub use column::Column;
71pub use dfschema::{
72    qualified_name, DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema,
73};
74pub use diagnostic::Diagnostic;
75pub use error::{
76    field_not_found, unqualified_field_not_found, DataFusionError, Result, SchemaError,
77    SharedResult,
78};
79pub use file_options::file_type::{
80    GetExt, DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
81    DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION,
82};
83pub use functional_dependencies::{
84    aggregate_functional_dependencies, get_required_group_by_exprs_indices,
85    get_target_functional_dependencies, Constraint, Constraints, Dependency,
86    FunctionalDependence, FunctionalDependencies,
87};
88use hashbrown::hash_map::DefaultHashBuilder;
89pub use join_type::{JoinConstraint, JoinSide, JoinType};
90pub use nested_struct::cast_column;
91pub use null_equality::NullEquality;
92pub use param_value::ParamValues;
93pub use scalar::{ScalarType, ScalarValue};
94pub use schema_reference::SchemaReference;
95pub use spans::{Location, Span, Spans};
96pub use stats::{ColumnStatistics, Statistics};
97pub use table_reference::{ResolvedTableReference, TableReference};
98pub use unnest::{RecursionUnnestOption, UnnestOptions};
99pub use utils::project_schema;
100
101// These are hidden from docs purely to avoid polluting the public view of what this crate exports.
102// These are just re-exports of macros by the same name, which gets around the 'cannot refer to
103// macro-expanded macro_export macros by their full path' error.
104// The design to get around this comes from this comment:
105// https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997
106#[doc(hidden)]
107pub use error::{
108    _config_datafusion_err, _exec_datafusion_err, _internal_datafusion_err,
109    _not_impl_datafusion_err, _plan_datafusion_err, _resources_datafusion_err,
110    _substrait_datafusion_err,
111};
112
113// The HashMap and HashSet implementations that should be used as the uniform defaults
114pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
115pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
116pub mod hash_map {
117    pub use hashbrown::hash_map::Entry;
118}
119pub mod hash_set {
120    pub use hashbrown::hash_set::Entry;
121}
122
123/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
124/// not possible. In normal usage of DataFusion the downcast should always succeed.
125///
126/// Example: `let array = downcast_value!(values, Int32Array)`
127#[macro_export]
128macro_rules! downcast_value {
129    ($Value: expr, $Type: ident) => {{
130        use $crate::__private::DowncastArrayHelper;
131        $Value.downcast_array_helper::<$Type>()?
132    }};
133    ($Value: expr, $Type: ident, $T: tt) => {{
134        use $crate::__private::DowncastArrayHelper;
135        $Value.downcast_array_helper::<$Type<$T>>()?
136    }};
137}
138
139// Not public API.
140#[doc(hidden)]
141pub mod __private {
142    use crate::error::_internal_datafusion_err;
143    use crate::Result;
144    use arrow::array::Array;
145    use std::any::{type_name, Any};
146
147    #[doc(hidden)]
148    pub trait DowncastArrayHelper {
149        fn downcast_array_helper<U: Any>(&self) -> Result<&U>;
150    }
151
152    impl<T: Array + ?Sized> DowncastArrayHelper for T {
153        fn downcast_array_helper<U: Any>(&self) -> Result<&U> {
154            self.as_any().downcast_ref().ok_or_else(|| {
155                let actual_type = self.data_type();
156                let desired_type_name = type_name::<U>();
157                _internal_datafusion_err!(
158                    "could not cast array of type {} to {}",
159                    actual_type,
160                    desired_type_name
161                )
162            })
163        }
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use arrow::array::{ArrayRef, Int32Array, UInt64Array};
170    use std::any::{type_name, type_name_of_val};
171    use std::sync::Arc;
172
173    #[test]
174    fn test_downcast_value() -> crate::Result<()> {
175        let boxed: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
176        let array = downcast_value!(&boxed, Int32Array);
177        assert_eq!(type_name_of_val(&array), type_name::<&Int32Array>());
178
179        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
180        assert_eq!(array, &expected);
181        Ok(())
182    }
183
184    #[test]
185    fn test_downcast_value_err_message() {
186        let boxed: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
187        let error: crate::DataFusionError = (|| {
188            downcast_value!(&boxed, UInt64Array);
189            Ok(())
190        })()
191        .err()
192        .unwrap();
193
194        assert_starts_with(
195            error.to_string(),
196            "Internal error: could not cast array of type Int32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::UInt64Type>"
197        );
198    }
199
200    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
201    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
202    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
203        let actual = actual.as_ref();
204        let expected_prefix = expected_prefix.as_ref();
205        assert!(
206            actual.starts_with(expected_prefix),
207            "Expected '{actual}' to start with '{expected_prefix}'"
208        );
209    }
210}