exon_vcf/config.rs
1// Copyright 2023 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use exon_common::DEFAULT_BATCH_SIZE;
19use object_store::ObjectStore;
20
21/// Configuration for a VCF datasource.
22#[derive(Debug)]
23pub struct VCFConfig {
24 /// The number of records to read at a time.
25 pub batch_size: usize,
26 /// The object store to use.
27 pub object_store: Arc<dyn ObjectStore>,
28 /// The file schema to use.
29 pub file_schema: Arc<arrow::datatypes::Schema>,
30 /// Any projections to apply to the resulting batches.
31 pub projection: Option<Vec<usize>>,
32}
33
34impl VCFConfig {
35 /// Create a new VCF configuration.
36 pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
37 Self {
38 batch_size: DEFAULT_BATCH_SIZE,
39 object_store,
40 file_schema,
41 projection: None,
42 }
43 }
44
45 /// Set the batch size.
46 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
47 self.batch_size = batch_size;
48 self
49 }
50
51 /// Set the projection.
52 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
53 self.projection = Some(projection);
54 self
55 }
56
57 /// Get the projected schema.
58 pub fn projected_schema(&self) -> SchemaRef {
59 match &self.projection {
60 Some(p) => Arc::new(self.file_schema.project(p).unwrap()),
61 None => self.file_schema.clone(),
62 }
63 }
64}