use std::sync::Arc;
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use object_store::ObjectStore;
pub struct GFFSchemaBuilder {
fields: Vec<Field>,
}
impl GFFSchemaBuilder {
pub fn new(fields: Vec<Field>) -> Self {
Self { fields }
}
pub fn append(mut self, field: Field) -> Self {
self.fields.push(field);
self
}
pub fn extend(mut self, fields: Vec<Field>) -> Self {
self.fields.extend(fields);
self
}
pub fn build(self) -> SchemaRef {
Arc::new(Schema::new(self.fields))
}
}
impl Default for GFFSchemaBuilder {
fn default() -> Self {
let attribute_key_field = Field::new("keys", DataType::Utf8, false);
let value_field = Field::new("item", DataType::Utf8, true);
let attribute_value_field =
Field::new("values", DataType::List(Arc::new(value_field)), true);
let fields = vec![
Field::new("seqname", DataType::Utf8, false),
Field::new("source", DataType::Utf8, true),
Field::new("type", DataType::Utf8, false),
Field::new("start", DataType::Int64, false),
Field::new("end", DataType::Int64, false),
Field::new("score", DataType::Float32, true),
Field::new("strand", DataType::Utf8, false),
Field::new("phase", DataType::Utf8, true),
Field::new_map(
"attributes",
"entries",
attribute_key_field,
attribute_value_field,
false,
true,
),
];
Self::new(fields)
}
}
pub struct GFFConfig {
pub batch_size: usize,
pub file_schema: SchemaRef,
pub object_store: Arc<dyn ObjectStore>,
pub projection: Option<Vec<usize>>,
}
impl GFFConfig {
pub fn new(object_store: Arc<dyn ObjectStore>) -> Self {
let file_schema = GFFSchemaBuilder::default().build();
Self {
file_schema,
object_store,
batch_size: 8096,
projection: None,
}
}
pub fn with_schema(mut self, file_schema: SchemaRef) -> Self {
self.file_schema = file_schema;
self
}
pub fn with_batch_size(mut self, batch_size: usize) -> Self {
self.batch_size = batch_size;
self
}
pub fn with_projection(mut self, projection: Vec<usize>) -> Self {
let file_projection = projection
.iter()
.filter(|f| **f < self.file_schema.fields().len())
.cloned()
.collect::<Vec<_>>();
self.projection = Some(file_projection);
self
}
}