datafusion_data_access/object_store/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Object Store abstracts access to an underlying file/object storage.
19
20pub mod local;
21
22use std::fmt::Debug;
23use std::io::Read;
24use std::pin::Pin;
25use std::sync::Arc;
26
27use async_trait::async_trait;
28use futures::{AsyncRead, Stream};
29
30use crate::{FileMeta, ListEntry, Result, SizedFile};
31
32/// Stream of files listed from object store
33pub type FileMetaStream =
34    Pin<Box<dyn Stream<Item = Result<FileMeta>> + Send + Sync + 'static>>;
35
36/// Stream of list entries obtained from object store
37pub type ListEntryStream =
38    Pin<Box<dyn Stream<Item = Result<ListEntry>> + Send + Sync + 'static>>;
39
40/// Stream readers opened on a given object store
41pub type ObjectReaderStream =
42    Pin<Box<dyn Stream<Item = Result<Arc<dyn ObjectReader>>> + Send + Sync>>;
43
44/// Object Reader for one file in an object store.
45///
46/// Note that the dynamic dispatch on the reader might
47/// have some performance impacts.
48#[async_trait]
49pub trait ObjectReader: Send + Sync {
50    /// Get reader for a part [start, start + length] in the file asynchronously
51    async fn chunk_reader(&self, start: u64, length: usize)
52        -> Result<Box<dyn AsyncRead>>;
53
54    /// Get reader for a part [start, start + length] in the file
55    fn sync_chunk_reader(
56        &self,
57        start: u64,
58        length: usize,
59    ) -> Result<Box<dyn Read + Send + Sync>>;
60
61    /// Get reader for the entire file
62    fn sync_reader(&self) -> Result<Box<dyn Read + Send + Sync>> {
63        self.sync_chunk_reader(0, self.length() as usize)
64    }
65
66    /// Get the size of the file
67    fn length(&self) -> u64;
68}
69
70/// A ObjectStore abstracts access to an underlying file/object storage.
71/// It maps strings (e.g. URLs, filesystem paths, etc) to sources of bytes
72#[async_trait]
73pub trait ObjectStore: Sync + Send + Debug {
74    /// Returns all the files in path `prefix`
75    async fn list_file(&self, prefix: &str) -> Result<FileMetaStream>;
76
77    /// Returns all the files in `prefix` if the `prefix` is already a leaf dir,
78    /// or all paths between the `prefix` and the first occurrence of the `delimiter` if it is provided.
79    async fn list_dir(
80        &self,
81        prefix: &str,
82        delimiter: Option<String>,
83    ) -> Result<ListEntryStream>;
84
85    /// Get object reader for one file
86    fn file_reader(&self, file: SizedFile) -> Result<Arc<dyn ObjectReader>>;
87}