exon_cram/
config.rs

1// Copyright 2024 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use exon_common::DEFAULT_BATCH_SIZE;
19use object_store::ObjectStore;
20
21/// Configuration for a CRAM datasource.
22#[derive(Debug)]
23pub struct CRAMConfig {
24    /// The number of records to read at a time.
25    pub batch_size: usize,
26    /// The object store to use.
27    pub object_store: Arc<dyn ObjectStore>,
28    /// The file schema to use.
29    pub file_schema: Arc<arrow::datatypes::Schema>,
30    /// Any projections to apply to the resulting batches.
31    pub projection: Option<Vec<usize>>,
32    /// The FASTA reference to use.
33    pub fasta_reference: Option<String>,
34}
35
36impl CRAMConfig {
37    /// Create a new VCF configuration.
38    pub fn new(
39        object_store: Arc<dyn ObjectStore>,
40        file_schema: SchemaRef,
41        fasta_reference: Option<String>,
42    ) -> Self {
43        Self {
44            batch_size: DEFAULT_BATCH_SIZE,
45            object_store,
46            file_schema,
47            projection: None,
48            fasta_reference,
49        }
50    }
51
52    /// Get the projection, returning the identity projection if none is set.
53    pub fn projection(&self) -> Vec<usize> {
54        self.projection
55            .clone()
56            .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect())
57    }
58
59    /// Set the batch size.
60    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
61        self.batch_size = batch_size;
62        self
63    }
64
65    /// Set the projection.
66    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
67        self.projection = Some(projection);
68        self
69    }
70
71    /// Get the projected schema.
72    pub fn projected_schema(&self) -> SchemaRef {
73        match &self.projection {
74            Some(p) => Arc::new(self.file_schema.project(p).unwrap()),
75            None => self.file_schema.clone(),
76        }
77    }
78}