datafusion_tracing/lib.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17//
18// This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.
19
20//! DataFusion Tracing is an extension for [Apache DataFusion](https://datafusion.apache.org/) that helps you monitor and debug queries. It uses [`tracing`](https://docs.rs/tracing/latest/tracing/) and [OpenTelemetry](https://opentelemetry.io/) to gather DataFusion metrics, trace execution steps, and preview partial query results.
21//!
22//! **Note:** This is not an official Apache Software Foundation release.
23//!
24//! # Overview
25//!
26//! When you run queries with DataFusion Tracing enabled, it automatically adds tracing around execution steps, records all native DataFusion metrics such as execution time and output row count, lets you preview partial results for easier debugging, and integrates with OpenTelemetry for distributed tracing. This makes it simpler to understand and improve query performance.
27//!
28//! ## See it in action
29//!
30//! Here's what DataFusion Tracing can look like in practice:
31//!
32//! <details>
33//! <summary>Jaeger UI</summary>
34//!
35//! 
36//! </details>
37//!
38//! <details>
39//! <summary>DataDog UI</summary>
40//!
41//! 
42//! </details>
43//!
44//! # Getting Started
45//!
46//! ## Installation
47//!
48//! Include DataFusion Tracing in your project's `Cargo.toml`:
49//!
50//! ```toml
51//! [dependencies]
52//! datafusion = "50.0.0"
53//! datafusion-tracing = "50.0.2"
54//! ```
55//!
56//! ## Compatibility note
57//! The ellipsis truncation indicator in `pretty_format_compact_batch` is disabled in this version
58//! because it requires `comfy-table >= 7.1.4`, while Apache Arrow currently pins `comfy-table` to
59//! `7.1.2` to preserve its MSRV. Context: `comfy-table 7.2.0` bumped MSRV to Rust 1.85 while Arrow
60//! remains at 1.84. See [arrow-rs issue #8243](https://github.com/apache/arrow-rs/issues/8243)
61//! and [PR #8244](https://github.com/apache/arrow-rs/pull/8244). Arrow used an exact pin rather
62//! than `~7.1`, which would also preserve MSRV while allowing 7.1.x (including 7.1.4). We will
63//! re-enable it once Arrow relaxes the pin to allow `>= 7.1.4`.
64//!
65//! ## Quick Start Example
66//!
67//! ```rust
68//! use datafusion::{
69//! arrow::{array::RecordBatch, util::pretty::pretty_format_batches},
70//! error::Result,
71//! execution::SessionStateBuilder,
72//! prelude::*,
73//! };
74//! use datafusion_tracing::{
75//! instrument_with_info_spans, pretty_format_compact_batch, InstrumentationOptions,
76//! };
77//! use std::sync::Arc;
78//! use tracing::field;
79//!
80//! #[tokio::main]
81//! async fn main() -> Result<()> {
82//! // Initialize tracing subscriber as usual
83//! // (See examples/otlp.rs for a complete example).
84//!
85//! // Set up tracing options (you can customize these).
86//! let options = InstrumentationOptions::builder()
87//! .record_metrics(true)
88//! .preview_limit(5)
89//! .preview_fn(Arc::new(|batch: &RecordBatch| {
90//! pretty_format_compact_batch(batch, 64, 3, 10).map(|fmt| fmt.to_string())
91//! }))
92//! .add_custom_field("env", "production")
93//! .add_custom_field("region", "us-west")
94//! .build();
95//!
96//! let instrument_rule = instrument_with_info_spans!(
97//! options: options,
98//! env = field::Empty,
99//! region = field::Empty,
100//! );
101//!
102//! let session_state = SessionStateBuilder::new()
103//! .with_default_features()
104//! .with_physical_optimizer_rule(instrument_rule)
105//! .build();
106//!
107//! let ctx = SessionContext::new_with_state(session_state);
108//!
109//! let results = ctx.sql("SELECT 1").await?.collect().await?;
110//! println!(
111//! "Query Results:\n{}",
112//! pretty_format_batches(results.as_slice())?
113//! );
114//!
115//! Ok(())
116//! }
117//! ```
118//!
119//! A more complete example can be found in the [examples directory](https://github.com/datafusion-contrib/datafusion-tracing/tree/main/examples).
120//!
121//! ## Optimizer rule ordering (put instrumentation last)
122//!
123//! Always register the instrumentation rule last in your physical optimizer chain.
124//!
125//! - Many optimizer rules identify nodes using `as_any().downcast_ref::<ConcreteExec>()`.
126//! Since instrumentation wraps each node in a private `InstrumentedExec`, those downcasts
127//! won’t match if instrumentation runs first, causing rules to be skipped or, in code
128//! that assumes success, to panic.
129//! - Some rules may rewrite parts of the plan after instrumentation. While `InstrumentedExec`
130//! re-wraps many common mutations, placing the rule last guarantees full, consistent
131//! coverage regardless of other rules’ behaviors.
132//!
133//! Why is `InstrumentedExec` private?
134//!
135//! - To prevent downstream code from downcasting to or unwrapping the wrapper, which would be
136//! brittle and force long-term compatibility constraints on its internals. The public
137//! contract is the optimizer rule, not the concrete node.
138//!
139//! How to ensure it is last:
140//!
141//! - When chaining: `builder.with_physical_optimizer_rule(rule_a)
142//! .with_physical_optimizer_rule(rule_b)
143//! .with_physical_optimizer_rule(instrument_rule)`
144//! - Or collect: `builder.with_physical_optimizer_rules(vec![..., instrument_rule])`
145//!
146
147mod instrument_rule;
148mod instrumented;
149mod instrumented_macros;
150mod metrics;
151mod node;
152mod options;
153mod preview;
154mod preview_utils;
155mod utils;
156
157// Hide implementation details from documentation.
158// This function is only public because it needs to be accessed by the macros,
159// but it's not intended for direct use by consumers of this crate.
160#[doc(hidden)]
161pub use instrument_rule::new_instrument_rule;
162
163pub use options::InstrumentationOptions;
164pub use preview_utils::pretty_format_compact_batch;