Skip to main content

datafusion_common/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19    html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20    html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_cfg))]
23// Make sure fast / cheap clones on Arc are explicit:
24// https://github.com/apache/datafusion/issues/11143
25#![deny(clippy::clone_on_ref_ptr)]
26#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
27
28mod column;
29mod dfschema;
30mod functional_dependencies;
31mod join_type;
32mod param_value;
33mod schema_reference;
34mod table_reference;
35mod unnest;
36
37pub mod alias;
38pub mod cast;
39pub mod config;
40pub mod cse;
41pub mod datatype;
42pub mod diagnostic;
43pub mod display;
44pub mod encryption;
45pub mod error;
46pub mod file_options;
47pub mod format;
48pub mod hash_utils;
49pub mod instant;
50pub mod metadata;
51pub mod nested_struct;
52mod null_equality;
53pub mod parquet_config;
54pub mod parsers;
55pub mod pruning;
56pub mod rounding;
57pub mod scalar;
58pub mod spans;
59pub mod stats;
60pub mod test_util;
61pub mod tree_node;
62pub mod types;
63pub mod utils;
64/// Reexport arrow crate
65pub use arrow;
66pub use column::Column;
67pub use dfschema::{
68    DFSchema, DFSchemaRef, ExprSchema, SchemaExt, ToDFSchema, qualified_name,
69};
70pub use diagnostic::Diagnostic;
71pub use display::human_readable::{
72    human_readable_count, human_readable_duration, human_readable_size, units,
73};
74pub use error::{
75    DataFusionError, Result, SchemaError, SharedResult, field_not_found,
76    unqualified_field_not_found,
77};
78pub use file_options::file_type::{
79    DEFAULT_ARROW_EXTENSION, DEFAULT_AVRO_EXTENSION, DEFAULT_CSV_EXTENSION,
80    DEFAULT_JSON_EXTENSION, DEFAULT_PARQUET_EXTENSION, GetExt,
81};
82pub use functional_dependencies::{
83    Constraint, Constraints, Dependency, FunctionalDependence, FunctionalDependencies,
84    aggregate_functional_dependencies, get_required_group_by_exprs_indices,
85    get_target_functional_dependencies,
86};
87use hashbrown::DefaultHashBuilder;
88pub use join_type::{JoinConstraint, JoinSide, JoinType};
89pub use nested_struct::cast_column;
90pub use null_equality::NullEquality;
91pub use param_value::ParamValues;
92pub use scalar::{ScalarType, ScalarValue};
93pub use schema_reference::SchemaReference;
94pub use spans::{Location, Span, Spans};
95pub use stats::{ColumnStatistics, Statistics};
96pub use table_reference::{ResolvedTableReference, TableReference};
97pub use unnest::{RecursionUnnestOption, UnnestOptions};
98pub use utils::project_schema;
99
100// These are hidden from docs purely to avoid polluting the public view of what this crate exports.
101// These are just re-exports of macros by the same name, which gets around the 'cannot refer to
102// macro-expanded macro_export macros by their full path' error.
103// The design to get around this comes from this comment:
104// https://github.com/rust-lang/rust/pull/52234#issuecomment-976702997
105#[doc(hidden)]
106pub use error::{
107    _config_datafusion_err, _exec_datafusion_err, _ffi_datafusion_err,
108    _internal_datafusion_err, _not_impl_datafusion_err, _plan_datafusion_err,
109    _resources_datafusion_err, _substrait_datafusion_err,
110};
111
112// The HashMap and HashSet implementations that should be used as the uniform defaults
113pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
114pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
115pub mod hash_map {
116    pub use hashbrown::hash_map::Entry;
117}
118pub mod hash_set {
119    pub use hashbrown::hash_set::Entry;
120}
121
122/// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
123/// not possible. In normal usage of DataFusion the downcast should always succeed.
124///
125/// Example: `let array = downcast_value!(values, Int32Array)`
126#[macro_export]
127macro_rules! downcast_value {
128    ($Value: expr, $Type: ident) => {{
129        use $crate::__private::DowncastArrayHelper;
130        $Value.downcast_array_helper::<$Type>()?
131    }};
132    ($Value: expr, $Type: ident, $T: tt) => {{
133        use $crate::__private::DowncastArrayHelper;
134        $Value.downcast_array_helper::<$Type<$T>>()?
135    }};
136}
137
138// Not public API.
139#[doc(hidden)]
140pub mod __private {
141    use crate::Result;
142    use crate::error::_internal_datafusion_err;
143    use arrow::array::Array;
144    use std::any::{Any, type_name};
145
146    #[doc(hidden)]
147    pub trait DowncastArrayHelper {
148        fn downcast_array_helper<U: Any>(&self) -> Result<&U>;
149    }
150
151    impl<T: Array + ?Sized> DowncastArrayHelper for T {
152        fn downcast_array_helper<U: Any>(&self) -> Result<&U> {
153            self.as_any().downcast_ref().ok_or_else(|| {
154                let actual_type = self.data_type();
155                let desired_type_name = type_name::<U>();
156                _internal_datafusion_err!(
157                    "could not cast array of type {} to {}",
158                    actual_type,
159                    desired_type_name
160                )
161            })
162        }
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use arrow::array::{ArrayRef, Int32Array, UInt64Array};
169    use std::any::{type_name, type_name_of_val};
170    use std::sync::Arc;
171
172    #[test]
173    fn test_downcast_value() -> crate::Result<()> {
174        let boxed: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
175        let array = downcast_value!(&boxed, Int32Array);
176        assert_eq!(type_name_of_val(&array), type_name::<&Int32Array>());
177
178        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
179        assert_eq!(array, &expected);
180        Ok(())
181    }
182
183    #[test]
184    fn test_downcast_value_err_message() {
185        let boxed: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
186        let error: crate::DataFusionError = (|| {
187            downcast_value!(&boxed, UInt64Array);
188            Ok(())
189        })()
190        .err()
191        .unwrap();
192
193        assert_starts_with(
194            error.to_string(),
195            "Internal error: could not cast array of type Int32 to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::UInt64Type>",
196        );
197    }
198
199    // `err.to_string()` depends on backtrace being present (may have backtrace appended)
200    // `err.strip_backtrace()` also depends on backtrace being present (may have "This was likely caused by ..." stripped)
201    fn assert_starts_with(actual: impl AsRef<str>, expected_prefix: impl AsRef<str>) {
202        let actual = actual.as_ref();
203        let expected_prefix = expected_prefix.as_ref();
204        assert!(
205            actual.starts_with(expected_prefix),
206            "Expected '{actual}' to start with '{expected_prefix}'"
207        );
208    }
209}