use std::sync::Arc;
use arrow::{
csv::{reader::Decoder, ReaderBuilder},
datatypes::SchemaRef,
};
use exon_common::DEFAULT_BATCH_SIZE;
use object_store::ObjectStore;
pub struct HMMDomTabConfig {
pub batch_size: usize,
pub file_schema: SchemaRef,
pub object_store: Arc<dyn ObjectStore>,
pub projection: Option<Vec<usize>>,
}
impl HMMDomTabConfig {
pub fn new(object_store: Arc<dyn ObjectStore>, file_schema: SchemaRef) -> Self {
Self {
object_store,
file_schema,
batch_size: DEFAULT_BATCH_SIZE,
projection: None,
}
}
pub fn build_decoder(&self) -> Decoder {
let builder = ReaderBuilder::new(Arc::clone(&self.file_schema))
.with_header(false)
.with_delimiter(b'\t')
.with_batch_size(self.batch_size);
builder.build_decoder()
}
pub fn with_batch_size(mut self, batch_size: usize) -> Self {
self.batch_size = batch_size;
self
}
pub fn with_some_projection(mut self, projection: Option<Vec<usize>>) -> Self {
self.projection = projection;
self
}
}