datafusion_orc/
file_source.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::physical_exec::OrcOpener;
19use arrow::datatypes::SchemaRef;
20use datafusion::common::Statistics;
21use datafusion::datasource::physical_plan::{FileOpener, FileScanConfig, FileSource};
22use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
23use object_store::ObjectStore;
24use std::any::Any;
25use std::sync::Arc;
26
27#[derive(Debug, Clone)]
28pub struct OrcSource {
29    metrics: ExecutionPlanMetricsSet,
30    statistics: Statistics,
31    batch_size: usize,
32}
33
34impl Default for OrcSource {
35    fn default() -> Self {
36        Self {
37            metrics: ExecutionPlanMetricsSet::default(),
38            statistics: Statistics::default(),
39            batch_size: 1024,
40        }
41    }
42}
43
44impl FileSource for OrcSource {
45    fn create_file_opener(
46        &self,
47        object_store: Arc<dyn ObjectStore>,
48        config: &FileScanConfig,
49        _partition: usize,
50    ) -> Arc<dyn FileOpener> {
51        Arc::new(OrcOpener::new(object_store, config, self.batch_size))
52    }
53
54    fn as_any(&self) -> &dyn Any {
55        self
56    }
57
58    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
59        Arc::new(Self {
60            batch_size,
61            ..self.clone()
62        })
63    }
64
65    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
66        Arc::new(self.clone())
67    }
68
69    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
70        Arc::new(self.clone())
71    }
72
73    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
74        Arc::new(Self {
75            statistics,
76            ..self.clone()
77        })
78    }
79
80    fn metrics(&self) -> &ExecutionPlanMetricsSet {
81        &self.metrics
82    }
83
84    fn statistics(&self) -> datafusion::common::Result<Statistics> {
85        Ok(self.statistics.clone())
86    }
87
88    fn file_type(&self) -> &str {
89        "orc"
90    }
91}