exon_cram/config.rs
1// Copyright 2024 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use exon_common::DEFAULT_BATCH_SIZE;
19use object_store::ObjectStore;
20
21/// Configuration for a CRAM datasource.
22#[derive(Debug)]
23pub struct CRAMConfig {
24 /// The number of records to read at a time.
25 pub batch_size: usize,
26 /// The object store to use.
27 pub object_store: Arc<dyn ObjectStore>,
28 /// The file schema to use.
29 pub file_schema: Arc<arrow::datatypes::Schema>,
30 /// Any projections to apply to the resulting batches.
31 pub projection: Option<Vec<usize>>,
32 /// The FASTA reference to use.
33 pub fasta_reference: Option<String>,
34}
35
36impl CRAMConfig {
37 /// Create a new VCF configuration.
38 pub fn new(
39 object_store: Arc<dyn ObjectStore>,
40 file_schema: SchemaRef,
41 fasta_reference: Option<String>,
42 ) -> Self {
43 Self {
44 batch_size: DEFAULT_BATCH_SIZE,
45 object_store,
46 file_schema,
47 projection: None,
48 fasta_reference,
49 }
50 }
51
52 /// Get the projection, returning the identity projection if none is set.
53 pub fn projection(&self) -> Vec<usize> {
54 self.projection
55 .clone()
56 .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect())
57 }
58
59 /// Set the batch size.
60 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
61 self.batch_size = batch_size;
62 self
63 }
64
65 /// Set the projection.
66 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
67 self.projection = Some(projection);
68 self
69 }
70
71 /// Get the projected schema.
72 pub fn projected_schema(&self) -> SchemaRef {
73 match &self.projection {
74 Some(p) => Arc::new(self.file_schema.project(p).unwrap()),
75 None => self.file_schema.clone(),
76 }
77 }
78}