exon_sam/config.rs
1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use arrow::error::Result;
19use exon_common::DEFAULT_BATCH_SIZE;
20use object_store::ObjectStore;
21
22/// Configuration for a SAM datasource.
23pub struct SAMConfig {
24 /// The number of rows to read at a time.
25 pub batch_size: usize,
26
27 /// The schema of the SAM file.
28 pub file_schema: SchemaRef,
29
30 /// The object store to use for reading SAM files.
31 pub object_store: Arc<dyn ObjectStore>,
32
33 /// Any projections to apply to the resulting batches.
34 pub projection: Option<Vec<usize>>,
35}
36
37impl SAMConfig {
38 /// Create a new SAM configuration.
39 pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
40 Self {
41 batch_size: DEFAULT_BATCH_SIZE,
42 file_schema,
43 object_store,
44 projection: None,
45 }
46 }
47
48 /// Set the batch size.
49 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
50 self.batch_size = batch_size;
51 self
52 }
53
54 /// Set the projection.
55 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
56 self.projection = Some(projection);
57 self
58 }
59
60 /// Get the projection, returning the identity projection if none is set.
61 pub fn projection(&self) -> Vec<usize> {
62 self.projection
63 .clone()
64 .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect())
65 }
66
67 /// Get the projected schema.
68 pub fn projected_schema(&self) -> Result<SchemaRef> {
69 let schema = self.file_schema.project(&self.projection())?;
70
71 Ok(Arc::new(schema))
72 }
73}