1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. //! Data source traits use std::any::Any; use std::sync::Arc; use crate::arrow::datatypes::SchemaRef; use crate::error::Result; use crate::logical_plan::Expr; use crate::physical_plan::ExecutionPlan; /// This table statistics are estimates. /// It can not be used directly in the precise compute #[derive(Debug, Clone, Default)] pub struct Statistics { /// The number of table rows pub num_rows: Option<usize>, /// total byte of the table rows pub total_byte_size: Option<usize>, /// Statistics on a column level pub column_statistics: Option<Vec<ColumnStatistics>>, } /// This table statistics are estimates about column #[derive(Clone, Debug, PartialEq)] pub struct ColumnStatistics { /// Number of null values on column pub null_count: Option<usize>, } /// Indicates whether and how a filter expression can be handled by a /// TableProvider for table scans. #[derive(Debug, Clone)] pub enum TableProviderFilterPushDown { /// The expression cannot be used by the provider. Unsupported, /// The expression can be used to help minimise the data retrieved, /// but the provider cannot guarantee that all returned tuples /// satisfy the filter. The Filter plan node containing this expression /// will be preserved. Inexact, /// The provider guarantees that all returned data satisfies this /// filter expression. The Filter plan node containing this expression /// will be removed. Exact, } /// Source table pub trait TableProvider { /// Returns the table provider as [`Any`](std::any::Any) so that it can be /// downcast to a specific implementation. fn as_any(&self) -> &dyn Any; /// Get a reference to the schema for this table fn schema(&self) -> SchemaRef; /// Create an ExecutionPlan that will scan the table. fn scan( &self, projection: &Option<Vec<usize>>, batch_size: usize, filters: &[Expr], ) -> Result<Arc<dyn ExecutionPlan>>; /// Returns the table Statistics /// Statistics should be optional because not all data sources can provide statistics. fn statistics(&self) -> Statistics; /// Tests whether the table provider can make use of a filter expression /// to optimise data retrieval. fn supports_filter_pushdown( &self, _filter: &Expr, ) -> Result<TableProviderFilterPushDown> { Ok(TableProviderFilterPushDown::Unsupported) } }