query_synthetic/common/
mod.rs

1//! Common utilities shared across zarr-datafusion examples
2//!
3//! This module provides helper functions for initializing tracing,
4//! creating DataFusion session contexts, and running queries.
5//!
6//! # Usage
7//!
8//! ```rust,ignore
9//! mod common;
10//!
11//! fn main() {
12//!     common::init_tracing();
13//!     let ctx = common::create_local_context();
14//!     // ... use ctx ...
15//! }
16//! ```
17
18// Allow unused functions - this is a shared utility module and not all
19// functions are used by every example
20#![allow(dead_code)]
21
22use std::collections::HashMap;
23use std::sync::Arc;
24
25use datafusion::error::Result;
26use datafusion::execution::session_state::SessionStateBuilder;
27use datafusion::prelude::SessionContext;
28use tracing_subscriber::EnvFilter;
29use zarr_datafusion::datasource::factory::ZarrTableFactory;
30use zarr_datafusion::optimizer::{CountStatisticsRule, MinMaxStatisticsRule};
31
32/// Initialize the tracing subscriber with environment-based filtering.
33///
34/// Control log level via RUST_LOG environment variable:
35/// - `RUST_LOG=info` - info and above
36/// - `RUST_LOG=debug` - debug and above
37/// - `RUST_LOG=zarr_datafusion=debug` - only zarr_datafusion debug logs
38pub fn init_tracing() {
39    tracing_subscriber::fmt()
40        .with_env_filter(EnvFilter::from_default_env())
41        .with_target(true)
42        .with_line_number(true)
43        .init();
44}
45
46/// Create a SessionContext configured for local Zarr file access.
47///
48/// Includes the CountStatisticsRule and MinMaxStatisticsRule optimizers
49/// for efficient count(*) and min/max queries on coordinates.
50pub fn create_local_context() -> SessionContext {
51    let state = SessionStateBuilder::new()
52        .with_default_features()
53        .with_optimizer_rule(Arc::new(CountStatisticsRule::new()))
54        .with_optimizer_rule(Arc::new(MinMaxStatisticsRule::new()))
55        .build();
56    SessionContext::new_with_state(state)
57}
58
59/// Create a SessionContext configured for remote Zarr access (GCS, S3, etc).
60///
61/// Includes ZarrTableFactory for `CREATE EXTERNAL TABLE ... STORED AS ZARR`,
62/// plus CountStatisticsRule and MinMaxStatisticsRule optimizers.
63pub fn create_remote_context() -> SessionContext {
64    let state = SessionStateBuilder::new()
65        .with_default_features()
66        .with_table_factories(HashMap::from([(
67            "ZARR".to_string(),
68            Arc::new(ZarrTableFactory) as _,
69        )]))
70        .with_optimizer_rule(Arc::new(CountStatisticsRule::new()))
71        .with_optimizer_rule(Arc::new(MinMaxStatisticsRule::new()))
72        .build();
73    SessionContext::new_with_state(state)
74}
75
76/// Execute a SQL query and display results with a description.
77///
78/// Prints the description, SQL statement, and tabular results.
79pub async fn run_query(ctx: &SessionContext, description: &str, sql: &str) -> Result<()> {
80    println!("\n{description}");
81    println!("SQL: {sql}");
82    println!();
83    let df = ctx.sql(sql).await?;
84    df.show().await?;
85    Ok(())
86}