query_synthetic/
query_synthetic.rs

1//! Example: Query synthetic weather data using SQL
2//!
3//! This example demonstrates querying the synthetic weather dataset
4//! generated by `scripts/data_gen.py`. The data contains random
5//! temperature and humidity values for a 10x10 lat/lon grid over 7 days.
6//!
7//! Run with tracing enabled:
8//!   RUST_LOG=info cargo run --example query_synthetic
9//!   RUST_LOG=debug cargo run --example query_synthetic
10
11mod common;
12
13use std::sync::Arc;
14use zarr_datafusion::datasource::zarr::ZarrTable;
15use zarr_datafusion::reader::schema_inference::infer_schema_with_meta;
16
17#[tokio::main]
18async fn main() -> datafusion::error::Result<()> {
19    common::init_tracing();
20    let ctx = common::create_local_context();
21
22    // Load synthetic weather data (Zarr v3) with metadata for statistics
23    let store_path = "data/synthetic_v3.zarr";
24    let (schema, metadata) = infer_schema_with_meta(store_path).expect("Failed to infer schema");
25    let schema = Arc::new(schema);
26
27    println!("Synthetic Weather Data Schema:");
28    for field in schema.fields() {
29        println!("  {}: {:?}", field.name(), field.data_type());
30    }
31    println!("Total rows: {}", metadata.total_rows);
32
33    let table = Arc::new(ZarrTable::with_metadata(schema, store_path, metadata));
34    ctx.register_table("synthetic", table)?;
35
36    common::run_query(
37        &ctx,
38        "Sample data (first 10 rows):",
39        "SELECT * FROM synthetic LIMIT 10",
40    )
41    .await?;
42
43    common::run_query(
44        &ctx,
45        "Filtered data (temperature > 5):",
46        "SELECT time, lat, lon, temperature FROM synthetic WHERE temperature > 5 LIMIT 10",
47    )
48    .await?;
49
50    common::run_query(
51        &ctx,
52        "Average temperature per day:",
53        "SELECT time, AVG(temperature) as avg_temp FROM synthetic GROUP BY time ORDER BY time",
54    )
55    .await?;
56
57    common::run_query(
58        &ctx,
59        "Total rows (optimized - uses statistics, no data scan):",
60        "SELECT COUNT(temperature) as total FROM synthetic",
61    )
62    .await?;
63
64    common::run_query(
65        &ctx,
66        "Coordinate bounds (optimized - uses statistics, no data scan):",
67        "SELECT MIN(lat) as lat_min, MAX(lat) as lat_max, MIN(lon) as lon_min, MAX(lon) as lon_max FROM synthetic",
68    )
69    .await?;
70
71    Ok(())
72}