use std::sync::Arc;
use crate::{
datasources::{exon_listing_table_options::ExonListingConfig, ScanFunction},
error::ExonError,
ExonRuntimeEnvExt,
};
use datafusion::{
datasource::{
file_format::file_compression_type::FileCompressionType, function::TableFunctionImpl,
listing::ListingTableUrl, TableProvider,
},
error::{DataFusionError, Result},
execution::context::SessionContext,
logical_expr::Expr,
scalar::ScalarValue,
};
use exon_gff::new_gff_schema_builder;
use super::table_provider::{ListingGFFTable, ListingGFFTableOptions};
pub struct GFFScanFunction {
ctx: SessionContext,
}
impl std::fmt::Debug for GFFScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GFFScanFunction").finish()
}
}
impl GFFScanFunction {
pub fn new(ctx: SessionContext) -> Self {
Self { ctx }
}
}
impl TableFunctionImpl for GFFScanFunction {
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
let listing_scan_function = ScanFunction::try_from(exprs)?;
futures::executor::block_on(async {
self.ctx
.runtime_env()
.exon_register_object_store_url(listing_scan_function.listing_table_url.as_ref())
.await
})?;
let schema = new_gff_schema_builder().build();
let listing_table_options =
ListingGFFTableOptions::new(listing_scan_function.file_compression_type)
.with_indexed(false);
let listing_table_config = ExonListingConfig::new_with_options(
listing_scan_function.listing_table_url,
listing_table_options,
);
let listing_table = ListingGFFTable::new(listing_table_config, schema);
Ok(Arc::new(listing_table))
}
}
pub struct GFFIndexedScanFunction {
ctx: SessionContext,
}
impl std::fmt::Debug for GFFIndexedScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GFFIndexedScanFunction").finish()
}
}
impl GFFIndexedScanFunction {
pub fn new(ctx: SessionContext) -> Self {
Self { ctx }
}
}
impl TableFunctionImpl for GFFIndexedScanFunction {
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
let Some(Expr::Literal(ScalarValue::Utf8(Some(path)))) = exprs.first() else {
return Err(DataFusionError::Internal(
"this function requires the path to be specified as the first argument".into(),
));
};
let listing_table_url = ListingTableUrl::parse(path)?;
futures::executor::block_on(async {
self.ctx
.runtime_env()
.exon_register_object_store_url(listing_table_url.as_ref())
.await
})?;
let Some(Expr::Literal(ScalarValue::Utf8(Some(region_str)))) = exprs.get(1) else {
return Err(DataFusionError::Internal(
"this function requires the region to be specified as the second argument".into(),
));
};
let region = region_str.parse().map_err(ExonError::from)?;
let listing_table_options = ListingGFFTableOptions::new(FileCompressionType::GZIP)
.with_indexed(true)
.with_region(region);
let listing_table_config =
ExonListingConfig::new_with_options(listing_table_url, listing_table_options);
let schema = new_gff_schema_builder().build();
let listing_table = ListingGFFTable::new(listing_table_config, schema);
Ok(Arc::new(listing_table))
}
}