ZarrTable

Struct ZarrTable 

Source
pub struct ZarrTable { /* private fields */ }

Implementations§

Source§

impl ZarrTable

Source

pub fn new(schema: SchemaRef, path: impl Into<String>) -> Self

Source

pub fn with_metadata( schema: SchemaRef, path: impl Into<String>, metadata: ZarrStoreMeta, ) -> Self

Create a ZarrTable with store metadata (for local paths)

Examples found in repository?
examples/query_era5.rs (line 29)
14async fn main() -> datafusion::error::Result<()> {
15    common::init_tracing();
16    let ctx = common::create_local_context();
17
18    // Load ERA5 data from Zarr v3 store with metadata for statistics
19    let store_path = "data/era5_v3.zarr";
20    let (schema, metadata) = infer_schema_with_meta(store_path).expect("Failed to infer schema");
21    let schema = Arc::new(schema);
22
23    println!("ERA5 Schema:");
24    for field in schema.fields() {
25        println!("  {}: {:?}", field.name(), field.data_type());
26    }
27    println!("Total rows: {}", metadata.total_rows);
28
29    let table = Arc::new(ZarrTable::with_metadata(schema, store_path, metadata));
30    ctx.register_table("era5", table)?;
31
32    // Query 1: Sample data overview
33    common::run_query(
34        &ctx,
35        "Sample ERA5 data (first 10 rows):",
36        "SELECT * FROM era5 LIMIT 10",
37    )
38    .await?;
39
40    // Query 2: Average temperature by hybrid level (pressure level)
41    common::run_query(
42        &ctx,
43        "Average temperature by hybrid level:",
44        "SELECT hybrid,
45                AVG(temperature) as avg_temp,
46                MIN(temperature) as min_temp,
47                MAX(temperature) as max_temp
48         FROM era5
49         GROUP BY hybrid
50         ORDER BY hybrid",
51    )
52    .await?;
53
54    // Query 3: Count (optimized - uses statistics)
55    common::run_query(
56        &ctx,
57        "Total rows (optimized - uses statistics, no data scan):",
58        "SELECT COUNT(*) as total FROM era5",
59    )
60    .await?;
61
62    Ok(())
63}
More examples
Hide additional examples
examples/query_synthetic.rs (line 33)
18async fn main() -> datafusion::error::Result<()> {
19    common::init_tracing();
20    let ctx = common::create_local_context();
21
22    // Load synthetic weather data (Zarr v3) with metadata for statistics
23    let store_path = "data/synthetic_v3.zarr";
24    let (schema, metadata) = infer_schema_with_meta(store_path).expect("Failed to infer schema");
25    let schema = Arc::new(schema);
26
27    println!("Synthetic Weather Data Schema:");
28    for field in schema.fields() {
29        println!("  {}: {:?}", field.name(), field.data_type());
30    }
31    println!("Total rows: {}", metadata.total_rows);
32
33    let table = Arc::new(ZarrTable::with_metadata(schema, store_path, metadata));
34    ctx.register_table("synthetic", table)?;
35
36    common::run_query(
37        &ctx,
38        "Sample data (first 10 rows):",
39        "SELECT * FROM synthetic LIMIT 10",
40    )
41    .await?;
42
43    common::run_query(
44        &ctx,
45        "Filtered data (temperature > 5):",
46        "SELECT time, lat, lon, temperature FROM synthetic WHERE temperature > 5 LIMIT 10",
47    )
48    .await?;
49
50    common::run_query(
51        &ctx,
52        "Average temperature per day:",
53        "SELECT time, AVG(temperature) as avg_temp FROM synthetic GROUP BY time ORDER BY time",
54    )
55    .await?;
56
57    common::run_query(
58        &ctx,
59        "Total rows (optimized - uses statistics, no data scan):",
60        "SELECT COUNT(temperature) as total FROM synthetic",
61    )
62    .await?;
63
64    common::run_query(
65        &ctx,
66        "Coordinate bounds (optimized - uses statistics, no data scan):",
67        "SELECT MIN(lat) as lat_min, MAX(lat) as lat_max, MIN(lon) as lon_min, MAX(lon) as lon_max FROM synthetic",
68    )
69    .await?;
70
71    Ok(())
72}
Source

pub fn with_cached_remote( schema: SchemaRef, path: impl Into<String>, store: AsyncReadableListableStorage, prefix: ObjectPath, metadata: ZarrStoreMeta, ) -> Self

Create a ZarrTable with a cached async store and metadata (for remote URLs)

Trait Implementations§

Source§

impl Debug for ZarrTable

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl TableProvider for ZarrTable

Source§

fn supports_filters_pushdown( &self, filters: &[&Expr], ) -> Result<Vec<TableProviderFilterPushDown>>

Indicate which filters can be pushed down to the scan

Returns Inexact for all filters - we’ll handle coordinate equality filters during scan, but DataFusion should still apply filters post-scan for correctness (in case we miss any).

Source§

fn statistics(&self) -> Option<Statistics>

Return statistics for this table

This enables DataFusion’s optimizer to convert count(*) and count(column) queries into constant values without scanning the data.

For coordinate columns, we also provide:

  • min_value/max_value: Enables MIN(coord)/MAX(coord) optimization
  • distinct_count: Number of unique coordinate values
Source§

fn as_any(&self) -> &dyn Any

Returns the table provider as Any so that it can be downcast to a specific implementation.
Source§

fn schema(&self) -> SchemaRef

Get a reference to the schema for this table
Source§

fn table_type(&self) -> TableType

Get the type of this table for metadata/catalog purposes.
Source§

fn scan<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, _state: &'life1 dyn Session, projection: Option<&'life2 Vec<usize>>, filters: &'life3 [Expr], limit: Option<usize>, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Create an ExecutionPlan for scanning the table with optionally specified projection, filter and limit, described below. Read more
Source§

fn constraints(&self) -> Option<&Constraints>

Get a reference to the constraints of the table. Returns: Read more
Source§

fn get_table_definition(&self) -> Option<&str>

Get the create statement used to create this table, if available.
Source§

fn get_logical_plan(&self) -> Option<Cow<'_, LogicalPlan>>

Get the LogicalPlan of this table, if available.
Source§

fn get_column_default(&self, _column: &str) -> Option<&Expr>

Get the default value for a column, if available.
Source§

fn scan_with_args<'a, 'life0, 'life1, 'async_trait>( &'life0 self, state: &'life1 dyn Session, args: ScanArgs<'a>, ) -> Pin<Box<dyn Future<Output = Result<ScanResult, DataFusionError>> + Send + 'async_trait>>
where 'a: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Create an ExecutionPlan for scanning the table using structured arguments. Read more
Source§

fn insert_into<'life0, 'life1, 'async_trait>( &'life0 self, _state: &'life1 dyn Session, _input: Arc<dyn ExecutionPlan>, _insert_op: InsertOp, ) -> Pin<Box<dyn Future<Output = Result<Arc<dyn ExecutionPlan>, DataFusionError>> + Send + 'async_trait>>
where 'life0: 'async_trait, 'life1: 'async_trait, Self: 'async_trait,

Return an ExecutionPlan to insert data into this table, if supported. Read more

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> PolicyExt for T
where T: ?Sized,

Source§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow only if self and other return Action::Follow. Read more
Source§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns Action::Follow if either self or other returns Action::Follow. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V

Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more