datafusion_tracing/lib.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17//
18// This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.
19
20//! DataFusion Tracing is an extension for [Apache DataFusion](https://datafusion.apache.org/) that helps you monitor and debug queries. It uses [`tracing`](https://docs.rs/tracing/latest/tracing/) and [OpenTelemetry](https://opentelemetry.io/) to gather DataFusion metrics, trace execution steps, and preview partial query results.
21//!
22//! **Note:** This is not an official Apache Software Foundation release.
23//!
24//! # Overview
25//!
26//! When you run queries with DataFusion Tracing enabled, it automatically adds tracing around execution steps, records all native DataFusion metrics such as execution time and output row count, lets you preview partial results for easier debugging, and integrates with OpenTelemetry for distributed tracing. This makes it simpler to understand and improve query performance.
27//!
28//! ## See it in action
29//!
30//! Here's what DataFusion Tracing can look like in practice:
31//!
32//! <details>
33//! <summary>Jaeger UI</summary>
34//!
35//! 
36//! </details>
37//!
38//! <details>
39//! <summary>DataDog UI</summary>
40//!
41//! 
42//! </details>
43//!
44//! # Getting Started
45//!
46//! ## Installation
47//!
48//! Include DataFusion Tracing in your project's `Cargo.toml`:
49//!
50//! ```toml
51//! [dependencies]
52//! datafusion = "52.0.0"
53//! datafusion-tracing = "52.0.0"
54//! ```
55//!
56//! ## Quick Start Example
57//!
58//! ```rust
59//! use datafusion::{
60//! arrow::{array::RecordBatch, util::pretty::pretty_format_batches},
61//! error::Result,
62//! execution::SessionStateBuilder,
63//! prelude::*,
64//! };
65//! use datafusion_tracing::{
66//! instrument_rules_with_info_spans, instrument_with_info_spans,
67//! pretty_format_compact_batch, InstrumentationOptions, RuleInstrumentationOptions,
68//! };
69//! use std::sync::Arc;
70//! use tracing::field;
71//!
72//! #[tokio::main]
73//! async fn main() -> Result<()> {
74//! // Initialize tracing subscriber as usual
75//! // (See examples/otlp.rs for a complete example).
76//!
77//! // Set up execution plan tracing options (you can customize these).
78//! let exec_options = InstrumentationOptions::builder()
79//! .record_metrics(true)
80//! .preview_limit(5)
81//! .preview_fn(Arc::new(|batch: &RecordBatch| {
82//! pretty_format_compact_batch(batch, 64, 3, 10).map(|fmt| fmt.to_string())
83//! }))
84//! .add_custom_field("env", "production")
85//! .add_custom_field("region", "us-west")
86//! .build();
87//!
88//! let instrument_rule = instrument_with_info_spans!(
89//! options: exec_options,
90//! env = field::Empty,
91//! region = field::Empty,
92//! );
93//!
94//! let session_state = SessionStateBuilder::new()
95//! .with_default_features()
96//! .with_physical_optimizer_rule(instrument_rule)
97//! .build();
98//!
99//! // Instrument all rules (analyzer, logical optimizer, physical optimizer)
100//! // Physical plan creation tracing is automatically enabled when physical_optimizer is set
101//! let rule_options = RuleInstrumentationOptions::full().with_plan_diff();
102//! let session_state = instrument_rules_with_info_spans!(
103//! options: rule_options,
104//! state: session_state
105//! );
106//!
107//! let ctx = SessionContext::new_with_state(session_state);
108//!
109//! // Execute a query - the entire lifecycle is now traced:
110//! // SQL Parsing -> Logical Plan -> Analyzer Rules -> Optimizer Rules ->
111//! // Physical Plan Creation -> Physical Optimizer Rules -> Execution
112//! let results = ctx.sql("SELECT 1").await?.collect().await?;
113//! println!(
114//! "Query Results:\n{}",
115//! pretty_format_batches(results.as_slice())?
116//! );
117//!
118//! Ok(())
119//! }
120//! ```
121//!
122//! A more complete example can be found in the [examples directory](https://github.com/datafusion-contrib/datafusion-tracing/tree/main/examples).
123//!
124
125// Execution plan instrumentation (wraps ExecutionPlan nodes with tracing)
126mod exec_instrument_macros;
127mod exec_instrument_rule;
128mod instrumented_exec;
129
130// Rule instrumentation (wraps analyzer/optimizer/physical optimizer rules with tracing)
131mod rule_instrumentation;
132mod rule_instrumentation_macros;
133
134// Shared utilities
135mod metrics;
136mod node;
137mod options;
138mod planner;
139mod preview;
140mod preview_utils;
141mod rule_options;
142mod utils;
143
144// Hide implementation details from documentation.
145// These functions are only public because they need to be accessed by the macros,
146// but they're not intended for direct use by consumers of this crate.
147#[doc(hidden)]
148pub use exec_instrument_rule::new_instrument_rule;
149#[doc(hidden)]
150pub use rule_instrumentation::instrument_session_state;
151
152pub use options::InstrumentationOptions;
153pub use preview_utils::pretty_format_compact_batch;
154pub use rule_options::RuleInstrumentationOptions;