datafusion_data_access/object_store/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Object Store abstracts access to an underlying file/object storage.
19
20pub mod local;
21
22use std::fmt::Debug;
23use std::io::Read;
24use std::pin::Pin;
25use std::sync::Arc;
26
27use async_trait::async_trait;
28use futures::{AsyncRead, Stream};
29
30use crate::{FileMeta, ListEntry, Result, SizedFile};
31
32/// Stream of files listed from object store
33pub type FileMetaStream =
34 Pin<Box<dyn Stream<Item = Result<FileMeta>> + Send + Sync + 'static>>;
35
36/// Stream of list entries obtained from object store
37pub type ListEntryStream =
38 Pin<Box<dyn Stream<Item = Result<ListEntry>> + Send + Sync + 'static>>;
39
40/// Stream readers opened on a given object store
41pub type ObjectReaderStream =
42 Pin<Box<dyn Stream<Item = Result<Arc<dyn ObjectReader>>> + Send + Sync>>;
43
44/// Object Reader for one file in an object store.
45///
46/// Note that the dynamic dispatch on the reader might
47/// have some performance impacts.
48#[async_trait]
49pub trait ObjectReader: Send + Sync {
50 /// Get reader for a part [start, start + length] in the file asynchronously
51 async fn chunk_reader(&self, start: u64, length: usize)
52 -> Result<Box<dyn AsyncRead>>;
53
54 /// Get reader for a part [start, start + length] in the file
55 fn sync_chunk_reader(
56 &self,
57 start: u64,
58 length: usize,
59 ) -> Result<Box<dyn Read + Send + Sync>>;
60
61 /// Get reader for the entire file
62 fn sync_reader(&self) -> Result<Box<dyn Read + Send + Sync>> {
63 self.sync_chunk_reader(0, self.length() as usize)
64 }
65
66 /// Get the size of the file
67 fn length(&self) -> u64;
68}
69
70/// A ObjectStore abstracts access to an underlying file/object storage.
71/// It maps strings (e.g. URLs, filesystem paths, etc) to sources of bytes
72#[async_trait]
73pub trait ObjectStore: Sync + Send + Debug {
74 /// Returns all the files in path `prefix`
75 async fn list_file(&self, prefix: &str) -> Result<FileMetaStream>;
76
77 /// Returns all the files in `prefix` if the `prefix` is already a leaf dir,
78 /// or all paths between the `prefix` and the first occurrence of the `delimiter` if it is provided.
79 async fn list_dir(
80 &self,
81 prefix: &str,
82 delimiter: Option<String>,
83 ) -> Result<ListEntryStream>;
84
85 /// Get object reader for one file
86 fn file_reader(&self, file: SizedFile) -> Result<Arc<dyn ObjectReader>>;
87}