exon_bam/
config.rs

1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use object_store::ObjectStore;
19
20/// The configuration for the BAM data source.
21pub struct BAMConfig {
22    /// The number of rows to read at a time from the object store.
23    pub batch_size: usize,
24
25    /// The schema of the BAM file.
26    pub file_schema: SchemaRef,
27
28    /// The object store to use for reading BAM files.
29    pub object_store: Arc<dyn ObjectStore>,
30
31    /// Any projections to apply to the resulting batches.
32    pub projection: Option<Vec<usize>>,
33}
34
35impl BAMConfig {
36    /// Create a new BAM configuration.
37    pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
38        Self {
39            object_store,
40            file_schema,
41            batch_size: 8096,
42            projection: None,
43        }
44    }
45
46    /// Set the batch size.
47    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
48        self.batch_size = batch_size;
49        self
50    }
51
52    /// Set the projection.
53    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
54        self.projection = Some(projection);
55        self
56    }
57
58    /// Set the projection from an optional vector.
59    pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
60        self.projection = projection;
61        self
62    }
63
64    /// Get the projection, returning the identity projection if none is set.
65    pub fn projection(&self) -> Vec<usize> {
66        self.projection
67            .clone()
68            .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect())
69    }
70
71    /// Get the projected schema.
72    pub fn projected_schema(&self) -> arrow::error::Result<SchemaRef> {
73        let schema = self.file_schema.project(&self.projection())?;
74
75        Ok(Arc::new(schema))
76    }
77}