exon_bigwig/zoom_batch_reader/
config.rs1use std::sync::Arc;
16
17use arrow::{
18 datatypes::{DataType, Field, Schema, SchemaRef},
19 error::Result as ArrowResult,
20};
21use exon_common::{TableSchema, DEFAULT_BATCH_SIZE};
22use noodles::core::Region;
23use object_store::ObjectStore;
24
25pub struct SchemaBuilder {
26 file_fields: Vec<Field>,
27 partition_fields: Vec<Field>,
28}
29
30impl Default for SchemaBuilder {
31 fn default() -> Self {
32 let file_fields = vec![
33 Field::new("name", DataType::Utf8, false),
34 Field::new("start", DataType::Int32, false),
35 Field::new("end", DataType::Int32, false),
36 Field::new("total_items", DataType::Int32, false),
37 Field::new("bases_covered", DataType::Int32, false),
38 Field::new("max_value", DataType::Float64, false),
39 Field::new("min_value", DataType::Float64, false),
40 Field::new("sum_squares", DataType::Float64, false),
41 Field::new("sum", DataType::Float64, false),
42 ];
43
44 Self {
45 file_fields,
46 partition_fields: vec![],
47 }
48 }
49}
50
51impl SchemaBuilder {
52 pub fn new(file_fields: Vec<Field>, partition_fields: Vec<Field>) -> Self {
53 Self {
54 file_fields,
55 partition_fields,
56 }
57 }
58
59 pub fn add_partition_fields(&mut self, fields: Vec<Field>) {
60 self.partition_fields.extend(fields);
61 }
62
63 pub fn build(self) -> TableSchema {
65 let mut fields = self.file_fields.clone();
66 fields.extend_from_slice(&self.partition_fields);
67
68 let schema = Schema::new(fields);
69
70 let projection = (0..self.file_fields.len()).collect::<Vec<_>>();
71
72 TableSchema::new(Arc::new(schema), projection)
73 }
74}
75
76#[derive(Debug)]
78pub struct BigWigZoomConfig {
79 pub batch_size: usize,
81
82 pub file_schema: SchemaRef,
84
85 pub object_store: Arc<dyn ObjectStore>,
87
88 pub projection: Option<Vec<usize>>,
90
91 pub interval: Option<Region>,
93
94 pub reduction_level: u32,
96}
97
98impl BigWigZoomConfig {
99 pub fn new_with_schema(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
101 Self {
102 batch_size: DEFAULT_BATCH_SIZE,
103 object_store,
104 file_schema,
105 projection: None,
106 interval: None,
107 reduction_level: 400,
108 }
109 }
110
111 pub fn new(object_store: Arc<dyn ObjectStore>) -> ArrowResult<Self> {
112 let schema = SchemaBuilder::default().build();
113 let file_schema = schema.file_schema()?;
114
115 Ok(Self::new_with_schema(object_store, file_schema))
116 }
117
118 pub fn reduction_level(&self) -> u32 {
120 self.reduction_level
121 }
122
123 pub fn interval(&self) -> Option<&Region> {
125 self.interval.as_ref()
126 }
127
128 pub fn with_reduction_level(mut self, reduction_level: u32) -> Self {
130 self.reduction_level = reduction_level;
131 self
132 }
133
134 pub fn with_interval(mut self, interval: Region) -> Self {
136 self.interval = Some(interval);
137 self
138 }
139
140 pub fn with_some_interval(mut self, interval: Option<Region>) -> Self {
142 self.interval = interval;
143 self
144 }
145
146 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
148 self.batch_size = batch_size;
149 self
150 }
151
152 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
154 self.projection = Some(projection);
155 self
156 }
157
158 pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
160 self.projection = projection;
161 self
162 }
163}