exon_vcf/
config.rs

1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use exon_common::DEFAULT_BATCH_SIZE;
19use object_store::ObjectStore;
20
21/// Configuration for a VCF datasource.
22#[derive(Debug)]
23pub struct VCFConfig {
24    /// The number of records to read at a time.
25    pub batch_size: usize,
26    /// The object store to use.
27    pub object_store: Arc<dyn ObjectStore>,
28    /// The file schema to use.
29    pub file_schema: Arc<arrow::datatypes::Schema>,
30    /// Any projections to apply to the resulting batches.
31    pub projection: Option<Vec<usize>>,
32}
33
34impl VCFConfig {
35    /// Create a new VCF configuration.
36    pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
37        Self {
38            batch_size: DEFAULT_BATCH_SIZE,
39            object_store,
40            file_schema,
41            projection: None,
42        }
43    }
44
45    /// Set the batch size.
46    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
47        self.batch_size = batch_size;
48        self
49    }
50
51    /// Set the projection.
52    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
53        self.projection = Some(projection);
54        self
55    }
56
57    /// Get the projected schema.
58    pub fn projected_schema(&self) -> SchemaRef {
59        match &self.projection {
60            Some(p) => Arc::new(self.file_schema.project(p).unwrap()),
61            None => self.file_schema.clone(),
62        }
63    }
64}