query_era5/
query_era5.rs

1//! Example: Query ERA5 climate data using SQL
2//!
3//! Run with tracing enabled:
4//!   RUST_LOG=info cargo run --example query_era5
5//!   RUST_LOG=debug cargo run --example query_era5
6
7mod common;
8
9use std::sync::Arc;
10use zarr_datafusion::datasource::zarr::ZarrTable;
11use zarr_datafusion::reader::schema_inference::infer_schema_with_meta;
12
13#[tokio::main]
14async fn main() -> datafusion::error::Result<()> {
15    common::init_tracing();
16    let ctx = common::create_local_context();
17
18    // Load ERA5 data from Zarr v3 store with metadata for statistics
19    let store_path = "data/era5_v3.zarr";
20    let (schema, metadata) = infer_schema_with_meta(store_path).expect("Failed to infer schema");
21    let schema = Arc::new(schema);
22
23    println!("ERA5 Schema:");
24    for field in schema.fields() {
25        println!("  {}: {:?}", field.name(), field.data_type());
26    }
27    println!("Total rows: {}", metadata.total_rows);
28
29    let table = Arc::new(ZarrTable::with_metadata(schema, store_path, metadata));
30    ctx.register_table("era5", table)?;
31
32    // Query 1: Sample data overview
33    common::run_query(
34        &ctx,
35        "Sample ERA5 data (first 10 rows):",
36        "SELECT * FROM era5 LIMIT 10",
37    )
38    .await?;
39
40    // Query 2: Average temperature by hybrid level (pressure level)
41    common::run_query(
42        &ctx,
43        "Average temperature by hybrid level:",
44        "SELECT hybrid,
45                AVG(temperature) as avg_temp,
46                MIN(temperature) as min_temp,
47                MAX(temperature) as max_temp
48         FROM era5
49         GROUP BY hybrid
50         ORDER BY hybrid",
51    )
52    .await?;
53
54    // Query 3: Count (optimized - uses statistics)
55    common::run_query(
56        &ctx,
57        "Total rows (optimized - uses statistics, no data scan):",
58        "SELECT COUNT(*) as total FROM era5",
59    )
60    .await?;
61
62    Ok(())
63}