exon_bigwig/value_batch_reader/
config.rs1use std::sync::Arc;
16
17use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
18use exon_common::{TableSchema, DEFAULT_BATCH_SIZE};
19use noodles::core::Region;
20use object_store::ObjectStore;
21
22pub struct SchemaBuilder {
23 file_fields: Vec<Field>,
24 partition_fields: Vec<Field>,
25}
26
27impl Default for SchemaBuilder {
28 fn default() -> Self {
29 let file_fields = vec![
30 Field::new("name", DataType::Utf8, false),
31 Field::new("start", DataType::Int32, false),
32 Field::new("end", DataType::Int32, false),
33 Field::new("value", DataType::Float32, false),
34 ];
35
36 Self {
37 file_fields,
38 partition_fields: vec![],
39 }
40 }
41}
42
43impl SchemaBuilder {
44 pub fn new(file_fields: Vec<Field>, partition_fields: Vec<Field>) -> Self {
45 Self {
46 file_fields,
47 partition_fields,
48 }
49 }
50
51 pub fn add_partition_fields(&mut self, fields: Vec<Field>) {
52 self.partition_fields.extend(fields);
53 }
54
55 pub fn build(self) -> TableSchema {
57 let mut fields = self.file_fields.clone();
58 fields.extend_from_slice(&self.partition_fields);
59
60 let schema = Schema::new(fields);
61
62 let projection = (0..self.file_fields.len()).collect::<Vec<_>>();
63
64 TableSchema::new(Arc::new(schema), projection)
65 }
66}
67
68#[derive(Debug)]
69pub enum ValueReadType {
70 Interval(Region),
71 Scan,
72}
73
74#[derive(Debug)]
76pub struct BigWigValueConfig {
77 pub batch_size: usize,
79
80 pub file_schema: SchemaRef,
82
83 pub object_store: Arc<dyn ObjectStore>,
85
86 pub projection: Option<Vec<usize>>,
88
89 pub read_type: ValueReadType,
91}
92
93impl BigWigValueConfig {
94 pub fn new(object_store: Arc<dyn ObjectStore>) -> Self {
96 let file_schema = Schema::new(Fields::from_iter(vec![
97 Field::new("chrom", DataType::Utf8, false),
98 Field::new("start", DataType::Int32, false),
99 Field::new("end", DataType::Int32, false),
100 Field::new("value", DataType::Float32, false),
101 ]));
102
103 Self {
104 batch_size: DEFAULT_BATCH_SIZE,
105 object_store,
106 file_schema: Arc::new(file_schema),
107 projection: None,
108 read_type: ValueReadType::Scan,
109 }
110 }
111
112 pub fn new_with_schema(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
114 Self {
115 batch_size: DEFAULT_BATCH_SIZE,
116 object_store,
117 file_schema,
118 projection: None,
119 read_type: ValueReadType::Scan,
120 }
121 }
122
123 pub fn with_some_interval(mut self, interval: Option<Region>) -> Self {
125 if let Some(interval) = interval {
126 self.read_type = ValueReadType::Interval(interval);
127 } else {
128 self.read_type = ValueReadType::Scan;
129 }
130
131 self
132 }
133
134 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
136 self.batch_size = batch_size;
137 self
138 }
139
140 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
142 self.projection = Some(projection);
143 self
144 }
145
146 pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
148 self.projection = projection;
149 self
150 }
151}