1use std::sync::Arc;
16
17use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
18use exon_common::TableSchemaBuilder;
19use object_store::ObjectStore;
20
21#[derive(Debug, Clone)]
23pub struct GFFConfig {
24 pub batch_size: usize,
26
27 pub file_schema: SchemaRef,
29
30 pub object_store: Arc<dyn ObjectStore>,
32
33 pub projection: Option<Vec<usize>>,
35}
36
37impl GFFConfig {
38 pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: Arc<Schema>) -> Self {
40 Self {
41 file_schema,
42 object_store,
43 batch_size: 8096,
44 projection: None,
45 }
46 }
47
48 pub fn with_batch_size(mut self, batch_size: usize) -> Self {
50 self.batch_size = batch_size;
51 self
52 }
53
54 pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
56 let file_projection = projection
57 .iter()
58 .filter(|f| **f < self.file_schema.fields().len())
59 .cloned()
60 .collect::<Vec<_>>();
61
62 self.projection = Some(file_projection);
63 self
64 }
65
66 pub fn projection(&self) -> Vec<usize> {
68 self.projection
69 .clone()
70 .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect())
71 }
72
73 pub fn projected_schema(&self) -> arrow::error::Result<SchemaRef> {
75 let schema = self.file_schema.project(&self.projection())?;
76
77 Ok(Arc::new(schema))
78 }
79}
80
81pub fn new_gff_schema_builder() -> TableSchemaBuilder {
82 let attribute_key_field = Field::new("keys", DataType::Utf8, false);
83
84 let value_field = Field::new("item", DataType::Utf8, true);
86 let attribute_value_field = Field::new("values", DataType::List(Arc::new(value_field)), true);
87
88 let fields = vec![
89 Field::new("seqname", DataType::Utf8, false),
90 Field::new("source", DataType::Utf8, true),
91 Field::new("type", DataType::Utf8, false),
92 Field::new("start", DataType::Int64, false),
93 Field::new("end", DataType::Int64, false),
94 Field::new("score", DataType::Float32, true),
95 Field::new("strand", DataType::Utf8, false),
96 Field::new("phase", DataType::Utf8, true),
97 Field::new_map(
98 "attributes",
99 "entries",
100 attribute_key_field,
101 attribute_value_field,
102 false,
103 true,
104 ),
105 ];
106
107 TableSchemaBuilder::new_with_field_fields(fields)
108}