1use std::sync::Arc;
16
17use arrow::datatypes::{DataType, Field, SchemaRef};
18use exon_common::{TableSchemaBuilder, DEFAULT_BATCH_SIZE};
19use object_store::ObjectStore;
20
21pub fn new_gtf_schema_builder() -> TableSchemaBuilder {
22 let file_fields = file_fields();
23 TableSchemaBuilder::new_with_field_fields(file_fields)
24}
25
26fn file_fields() -> Vec<Field> {
28 let attribute_key_field = Field::new("keys", DataType::Utf8, false);
29 let attribute_value_field = Field::new("values", DataType::Utf8, true);
30
31 vec![
32 Field::new("seqname", DataType::Utf8, false),
34 Field::new("source", DataType::Utf8, true),
35 Field::new("type", DataType::Utf8, false),
36 Field::new("start", DataType::Int64, false),
37 Field::new("end", DataType::Int64, false),
38 Field::new("score", DataType::Float32, true),
39 Field::new("strand", DataType::Utf8, false),
40 Field::new("frame", DataType::Utf8, true),
41 Field::new_map(
42 "attributes",
43 "entries",
44 attribute_key_field,
45 attribute_value_field,
46 false,
47 true,
48 ),
49 ]
50}
51
52pub struct GTFConfig {
54 pub batch_size: usize,
56
57 pub file_schema: SchemaRef,
59
60 pub object_store: Arc<dyn ObjectStore>,
62
63 pub projection: Option<Vec<usize>>,
65}
66
67impl GTFConfig {
68 pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
70 Self {
71 file_schema,
72 object_store,
73 batch_size: DEFAULT_BATCH_SIZE,
74 projection: None,
75 }
76 }
77
78 pub fn with_schema(mut self, file_schema: SchemaRef) -> Self {
80 self.file_schema = file_schema;
81 self
82 }
83
84 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
86 self.batch_size = batch_size;
87 self
88 }
89
90 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
92 self.projection = Some(projection);
93 self
94 }
95
96 pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
98 self.projection = projection;
99 self
100 }
101}