pub struct ZarrTable { /* private fields */ }Implementations§
Source§impl ZarrTable
impl ZarrTable
pub fn new(schema: SchemaRef, path: impl Into<String>) -> Self
Sourcepub fn with_metadata(
schema: SchemaRef,
path: impl Into<String>,
metadata: ZarrStoreMeta,
) -> Self
pub fn with_metadata( schema: SchemaRef, path: impl Into<String>, metadata: ZarrStoreMeta, ) -> Self
Create a ZarrTable with store metadata (for local paths)
Examples found in repository?
examples/query_era5.rs (line 29)
14async fn main() -> datafusion::error::Result<()> {
15 common::init_tracing();
16 let ctx = common::create_local_context();
17
18 // Load ERA5 data from Zarr v3 store with metadata for statistics
19 let store_path = "data/era5_v3.zarr";
20 let (schema, metadata) = infer_schema_with_meta(store_path).expect("Failed to infer schema");
21 let schema = Arc::new(schema);
22
23 println!("ERA5 Schema:");
24 for field in schema.fields() {
25 println!(" {}: {:?}", field.name(), field.data_type());
26 }
27 println!("Total rows: {}", metadata.total_rows);
28
29 let table = Arc::new(ZarrTable::with_metadata(schema, store_path, metadata));
30 ctx.register_table("era5", table)?;
31
32 // Query 1: Sample data overview
33 common::run_query(
34 &ctx,
35 "Sample ERA5 data (first 10 rows):",
36 "SELECT * FROM era5 LIMIT 10",
37 )
38 .await?;
39
40 // Query 2: Average temperature by hybrid level (pressure level)
41 common::run_query(
42 &ctx,
43 "Average temperature by hybrid level:",
44 "SELECT hybrid,
45 AVG(temperature) as avg_temp,
46 MIN(temperature) as min_temp,
47 MAX(temperature) as max_temp
48 FROM era5
49 GROUP BY hybrid
50 ORDER BY hybrid",
51 )
52 .await?;
53
54 // Query 3: Count (optimized - uses statistics)
55 common::run_query(
56 &ctx,
57 "Total rows (optimized - uses statistics, no data scan):",
58 "SELECT COUNT(*) as total FROM era5",
59 )
60 .await?;
61
62 Ok(())
63}More examples
examples/query_synthetic.rs (line 33)
18async fn main() -> datafusion::error::Result<()> {
19 common::init_tracing();
20 let ctx = common::create_local_context();
21
22 // Load synthetic weather data (Zarr v3) with metadata for statistics
23 let store_path = "data/synthetic_v3.zarr";
24 let (schema, metadata) = infer_schema_with_meta(store_path).expect("Failed to infer schema");
25 let schema = Arc::new(schema);
26
27 println!("Synthetic Weather Data Schema:");
28 for field in schema.fields() {
29 println!(" {}: {:?}", field.name(), field.data_type());
30 }
31 println!("Total rows: {}", metadata.total_rows);
32
33 let table = Arc::new(ZarrTable::with_metadata(schema, store_path, metadata));
34 ctx.register_table("synthetic", table)?;
35
36 common::run_query(
37 &ctx,
38 "Sample data (first 10 rows):",
39 "SELECT * FROM synthetic LIMIT 10",
40 )
41 .await?;
42
43 common::run_query(
44 &ctx,
45 "Filtered data (temperature > 5):",
46 "SELECT time, lat, lon, temperature FROM synthetic WHERE temperature > 5 LIMIT 10",
47 )
48 .await?;
49
50 common::run_query(
51 &ctx,
52 "Average temperature per day:",
53 "SELECT time, AVG(temperature) as avg_temp FROM synthetic GROUP BY time ORDER BY time",
54 )
55 .await?;
56
57 common::run_query(
58 &ctx,
59 "Total rows (optimized - uses statistics, no data scan):",
60 "SELECT COUNT(temperature) as total FROM synthetic",
61 )
62 .await?;
63
64 common::run_query(
65 &ctx,
66 "Coordinate bounds (optimized - uses statistics, no data scan):",
67 "SELECT MIN(lat) as lat_min, MAX(lat) as lat_max, MIN(lon) as lon_min, MAX(lon) as lon_max FROM synthetic",
68 )
69 .await?;
70
71 Ok(())
72}Sourcepub fn with_cached_remote(
schema: SchemaRef,
path: impl Into<String>,
store: AsyncReadableListableStorage,
prefix: ObjectPath,
metadata: ZarrStoreMeta,
) -> Self
pub fn with_cached_remote( schema: SchemaRef, path: impl Into<String>, store: AsyncReadableListableStorage, prefix: ObjectPath, metadata: ZarrStoreMeta, ) -> Self
Create a ZarrTable with a cached async store and metadata (for remote URLs)
Trait Implementations§
Source§impl TableProvider for ZarrTable
impl TableProvider for ZarrTable
Source§fn supports_filters_pushdown(
&self,
filters: &[&Expr],
) -> Result<Vec<TableProviderFilterPushDown>>
fn supports_filters_pushdown( &self, filters: &[&Expr], ) -> Result<Vec<TableProviderFilterPushDown>>
Indicate which filters can be pushed down to the scan
Returns Inexact for all filters - we’ll handle coordinate equality
filters during scan, but DataFusion should still apply filters post-scan
for correctness (in case we miss any).
Source§fn statistics(&self) -> Option<Statistics>
fn statistics(&self) -> Option<Statistics>
Return statistics for this table
This enables DataFusion’s optimizer to convert count(*) and count(column) queries into constant values without scanning the data.
For coordinate columns, we also provide:
- min_value/max_value: Enables MIN(coord)/MAX(coord) optimization
- distinct_count: Number of unique coordinate values
Source§fn as_any(&self) -> &dyn Any
fn as_any(&self) -> &dyn Any
Returns the table provider as
Any so that it can be
downcast to a specific implementation.Source§fn table_type(&self) -> TableType
fn table_type(&self) -> TableType
Get the type of this table for metadata/catalog purposes.
Source§fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
projection: Option<&'life2 Vec<usize>>,
filters: &'life3 [Expr],
limit: Option<usize>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
projection: Option<&'life2 Vec<usize>>,
filters: &'life3 [Expr],
limit: Option<usize>,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
Create an
ExecutionPlan for scanning the table with optionally
specified projection, filter and limit, described below. Read moreSource§fn constraints(&self) -> Option<&Constraints>
fn constraints(&self) -> Option<&Constraints>
Get a reference to the constraints of the table.
Returns: Read more
Source§fn get_table_definition(&self) -> Option<&str>
fn get_table_definition(&self) -> Option<&str>
Get the create statement used to create this table, if available.
Source§fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>
fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>
Get the
LogicalPlan of this table, if available.Source§fn get_column_default(&self, _column: &str) -> Option<&Expr>
fn get_column_default(&self, _column: &str) -> Option<&Expr>
Get the default value for a column, if available.
Source§fn scan_with_args<'a, 'life0, 'life1, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
args: ScanArgs<'a>,
) -> Pin<Box<dyn Future<Output = Result<ScanResult, DataFusionError>> + Send + 'async_trait>>where
'a: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn scan_with_args<'a, 'life0, 'life1, 'async_trait>(
&'life0 self,
state: &'life1 dyn Session,
args: ScanArgs<'a>,
) -> Pin<Box<dyn Future<Output = Result<ScanResult, DataFusionError>> + Send + 'async_trait>>where
'a: 'async_trait,
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Create an
ExecutionPlan for scanning the table using structured arguments. Read moreSource§fn insert_into<'life0, 'life1, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
_input: Arc<dyn ExecutionPlan>,
_insert_op: InsertOp,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>, DataFusionError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
fn insert_into<'life0, 'life1, 'async_trait>(
&'life0 self,
_state: &'life1 dyn Session,
_input: Arc<dyn ExecutionPlan>,
_insert_op: InsertOp,
) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>, DataFusionError>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
Self: 'async_trait,
Return an
ExecutionPlan to insert data into this table, if
supported. Read moreAuto Trait Implementations§
impl Freeze for ZarrTable
impl !RefUnwindSafe for ZarrTable
impl Send for ZarrTable
impl Sync for ZarrTable
impl Unpin for ZarrTable
impl !UnwindSafe for ZarrTable
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more