exon_bam/config.rs
1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use object_store::ObjectStore;
19
20/// The configuration for the BAM data source.
21pub struct BAMConfig {
22 /// The number of rows to read at a time from the object store.
23 pub batch_size: usize,
24
25 /// The schema of the BAM file.
26 pub file_schema: SchemaRef,
27
28 /// The object store to use for reading BAM files.
29 pub object_store: Arc<dyn ObjectStore>,
30
31 /// Any projections to apply to the resulting batches.
32 pub projection: Option<Vec<usize>>,
33}
34
35impl BAMConfig {
36 /// Create a new BAM configuration.
37 pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
38 Self {
39 object_store,
40 file_schema,
41 batch_size: 8096,
42 projection: None,
43 }
44 }
45
46 /// Set the batch size.
47 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
48 self.batch_size = batch_size;
49 self
50 }
51
52 /// Set the projection.
53 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
54 self.projection = Some(projection);
55 self
56 }
57
58 /// Set the projection from an optional vector.
59 pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
60 self.projection = projection;
61 self
62 }
63
64 /// Get the projection, returning the identity projection if none is set.
65 pub fn projection(&self) -> Vec<usize> {
66 self.projection
67 .clone()
68 .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect())
69 }
70
71 /// Get the projected schema.
72 pub fn projected_schema(&self) -> arrow::error::Result<SchemaRef> {
73 let schema = self.file_schema.project(&self.projection())?;
74
75 Ok(Arc::new(schema))
76 }
77}