query_synthetic/common/mod.rs
1//! Common utilities shared across zarr-datafusion examples
2//!
3//! This module provides helper functions for initializing tracing,
4//! creating DataFusion session contexts, and running queries.
5//!
6//! # Usage
7//!
8//! ```rust,ignore
9//! mod common;
10//!
11//! fn main() {
12//! common::init_tracing();
13//! let ctx = common::create_local_context();
14//! // ... use ctx ...
15//! }
16//! ```
17
18// Allow unused functions - this is a shared utility module and not all
19// functions are used by every example
20#![allow(dead_code)]
21
22use std::collections::HashMap;
23use std::sync::Arc;
24
25use datafusion::error::Result;
26use datafusion::execution::session_state::SessionStateBuilder;
27use datafusion::prelude::SessionContext;
28use tracing_subscriber::EnvFilter;
29use zarr_datafusion::datasource::factory::ZarrTableFactory;
30use zarr_datafusion::optimizer::{CountStatisticsRule, MinMaxStatisticsRule};
31
32/// Initialize the tracing subscriber with environment-based filtering.
33///
34/// Control log level via RUST_LOG environment variable:
35/// - `RUST_LOG=info` - info and above
36/// - `RUST_LOG=debug` - debug and above
37/// - `RUST_LOG=zarr_datafusion=debug` - only zarr_datafusion debug logs
38pub fn init_tracing() {
39 tracing_subscriber::fmt()
40 .with_env_filter(EnvFilter::from_default_env())
41 .with_target(true)
42 .with_line_number(true)
43 .init();
44}
45
46/// Create a SessionContext configured for local Zarr file access.
47///
48/// Includes the CountStatisticsRule and MinMaxStatisticsRule optimizers
49/// for efficient count(*) and min/max queries on coordinates.
50pub fn create_local_context() -> SessionContext {
51 let state = SessionStateBuilder::new()
52 .with_default_features()
53 .with_optimizer_rule(Arc::new(CountStatisticsRule::new()))
54 .with_optimizer_rule(Arc::new(MinMaxStatisticsRule::new()))
55 .build();
56 SessionContext::new_with_state(state)
57}
58
59/// Create a SessionContext configured for remote Zarr access (GCS, S3, etc).
60///
61/// Includes ZarrTableFactory for `CREATE EXTERNAL TABLE ... STORED AS ZARR`,
62/// plus CountStatisticsRule and MinMaxStatisticsRule optimizers.
63pub fn create_remote_context() -> SessionContext {
64 let state = SessionStateBuilder::new()
65 .with_default_features()
66 .with_table_factories(HashMap::from([(
67 "ZARR".to_string(),
68 Arc::new(ZarrTableFactory) as _,
69 )]))
70 .with_optimizer_rule(Arc::new(CountStatisticsRule::new()))
71 .with_optimizer_rule(Arc::new(MinMaxStatisticsRule::new()))
72 .build();
73 SessionContext::new_with_state(state)
74}
75
76/// Execute a SQL query and display results with a description.
77///
78/// Prints the description, SQL statement, and tabular results.
79pub async fn run_query(ctx: &SessionContext, description: &str, sql: &str) -> Result<()> {
80 println!("\n{description}");
81 println!("SQL: {sql}");
82 println!();
83 let df = ctx.sql(sql).await?;
84 df.show().await?;
85 Ok(())
86}