exon_bed/
config.rs

1// Copyright 2024 WHERE TRUE Technologies.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use arrow::datatypes::SchemaRef;
18use exon_common::DEFAULT_BATCH_SIZE;
19use object_store::ObjectStore;
20
21use crate::ExonBEDResult;
22
23/// Configuration for a BED datasource.
24#[derive(Debug)]
25pub struct BEDConfig {
26    /// The number of records to read at a time.
27    pub batch_size: usize,
28
29    /// The schema of the BED file.
30    pub file_schema: SchemaRef,
31
32    /// The object store to use.
33    pub object_store: Arc<dyn ObjectStore>,
34
35    /// Any projections to apply to the resulting batches.
36    pub projection: Option<Vec<usize>>,
37
38    /// The number of fields of the BED to read.
39    pub n_fields: Option<usize>,
40}
41
42impl BEDConfig {
43    /// Create a new BED configuration.
44    pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
45        Self {
46            batch_size: DEFAULT_BATCH_SIZE,
47            object_store,
48            file_schema,
49            projection: None,
50            n_fields: None,
51        }
52    }
53
54    /// Set the number of fields.
55    pub fn with_n_fields(mut self, n_fields: usize) -> Self {
56        self.n_fields = Some(n_fields);
57        self
58    }
59
60    /// Set the batch size.
61    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
62        self.batch_size = batch_size;
63        self
64    }
65
66    /// Set the projection.
67    pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
68        self.projection = Some(projection);
69        self
70    }
71
72    /// Set the projection from an optional vector.
73    pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
74        self.projection = projection;
75        self
76    }
77
78    /// Get the projected schema.
79    pub fn projected_schema(&self) -> ExonBEDResult<SchemaRef> {
80        let schema = self.file_schema.project(&self.projection())?;
81
82        Ok(Arc::new(schema))
83    }
84
85    /// Return the projection, while accounting for the number of fields.
86    pub fn projection(&self) -> Vec<usize> {
87        match (&self.projection, &self.n_fields) {
88            (Some(projection), Some(n_fields)) => projection
89                .iter()
90                .filter(|&i| i < n_fields)
91                .copied()
92                .collect(),
93            (Some(projection), None) => projection.clone(),
94            (_, Some(n_fields)) => (0..*n_fields).collect(),
95            (_, _) => (0..self.file_schema.fields().len()).collect(),
96        }
97    }
98}