use std::sync::Arc;
use crate::{
config::extract_config_from_state,
datasources::{exon_listing_table_options::ExonListingConfig, ScanFunction},
error::ExonError,
};
use datafusion::{
datasource::{
file_format::file_compression_type::FileCompressionType, function::TableFunctionImpl,
listing::ListingTableUrl, TableProvider,
},
error::{DataFusionError, Result},
execution::context::SessionContext,
logical_expr::Expr,
scalar::ScalarValue,
};
use exon_common::TableSchema;
use super::table_provider::{ListingVCFTable, ListingVCFTableOptions};
pub struct VCFScanFunction {
ctx: SessionContext,
}
impl std::fmt::Debug for VCFScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("VCFScanFunction").finish()
}
}
impl VCFScanFunction {
pub fn new(ctx: SessionContext) -> Self {
Self { ctx }
}
}
impl TableFunctionImpl for VCFScanFunction {
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
let listing_scan_function = ScanFunction::try_from(exprs)?;
let state = self.ctx.state();
let exon_config_extension = extract_config_from_state(&state)?;
let listing_table_options =
ListingVCFTableOptions::new(listing_scan_function.file_compression_type, false)
.with_parse_formats(exon_config_extension.vcf_parse_formats)
.with_parse_info(exon_config_extension.vcf_parse_info);
let schema = futures::executor::block_on(async {
let schema = listing_table_options
.infer_schema(&state, &listing_scan_function.listing_table_url)
.await?;
Ok::<TableSchema, datafusion::error::DataFusionError>(schema)
})?;
let listing_table_config = ExonListingConfig::new_with_options(
listing_scan_function.listing_table_url,
listing_table_options,
);
let listing_table = ListingVCFTable::new(listing_table_config, schema);
Ok(Arc::new(listing_table))
}
}
pub struct VCFIndexedScanFunction {
ctx: SessionContext,
}
impl std::fmt::Debug for VCFIndexedScanFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("VCFIndexedScanFunction").finish()
}
}
impl VCFIndexedScanFunction {
pub fn new(ctx: SessionContext) -> Self {
Self { ctx }
}
}
impl TableFunctionImpl for VCFIndexedScanFunction {
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
let Some(Expr::Literal(ScalarValue::Utf8(Some(path)))) = exprs.first() else {
return Err(DataFusionError::Internal(
"this function requires the path to be specified as the first argument".into(),
));
};
let listing_table_url = ListingTableUrl::parse(path)?;
let Some(Expr::Literal(ScalarValue::Utf8(Some(region_str)))) = exprs.get(1) else {
return Err(DataFusionError::Internal(
"this function requires the region to be specified as the second argument".into(),
));
};
let region = region_str.parse().map_err(ExonError::from)?;
let state = self.ctx.state();
let exon_config_extension = extract_config_from_state(&state)?;
let listing_table_options = ListingVCFTableOptions::new(FileCompressionType::GZIP, true)
.with_regions(vec![region])
.with_parse_info(exon_config_extension.vcf_parse_info)
.with_parse_formats(exon_config_extension.vcf_parse_formats);
let schema = futures::executor::block_on(async {
let schema = listing_table_options
.infer_schema(&self.ctx.state(), &listing_table_url)
.await?;
Ok::<TableSchema, datafusion::error::DataFusionError>(schema)
})?;
let listing_table_config =
ExonListingConfig::new_with_options(listing_table_url, listing_table_options);
let listing_table = ListingVCFTable::new(listing_table_config, schema);
Ok(Arc::new(listing_table))
}
}