datafusion_tracing/
preview_utils.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17//
18// This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.
19
20use comfy_table::{Cell, ContentArrangement, Table};
21use datafusion::arrow::{
22    array::RecordBatch,
23    error::ArrowError,
24    util::display::{ArrayFormatter, FormatOptions},
25};
26use std::fmt::Display;
27use unicode_width::UnicodeWidthStr;
28
29const DEFAULT_PRESET: &str = "||--|=+||-+||++++++";
30const TRUNCATED_PRESET: &str = "|…--|=+…|-+|…+++…+…";
31
32/// Formats a `RecordBatch` as a neatly aligned ASCII table,
33/// constraining the total width to `max_width`. Columns are
34/// dynamically resized or truncated, and columns that cannot
35/// fit within the given width may be dropped.
36pub fn pretty_format_compact_batch(
37    batch: &RecordBatch,
38    max_width: usize,
39    max_row_height: usize,
40    min_compacted_col_width: usize,
41) -> Result<impl Display, ArrowError> {
42    let schema = batch.schema();
43    let total_fields = schema.fields().len();
44    let format_opts = FormatOptions::default().with_display_error(true);
45
46    // Initialize header and column formatters
47    let header: Vec<Cell> = schema
48        .fields()
49        .iter()
50        .map(|f| Cell::new(f.name()))
51        .collect();
52    let formatters: Vec<_> = batch
53        .columns()
54        .iter()
55        .map(|col| ArrayFormatter::try_new(col.as_ref(), &format_opts))
56        .collect::<Result<_, ArrowError>>()?;
57
58    // Generate the 2d array of formatted values
59    let formatted_values: Vec<Vec<Cell>> = (0..batch.num_rows())
60        .map(|row_idx| {
61            formatters
62                .iter()
63                .map(|fmt| Cell::new(fmt.value(row_idx)))
64                .collect()
65        })
66        .collect();
67
68    // Compute column widths as the maximum width of each cell for that column, header included
69    let mut column_widths = vec![0; total_fields];
70    for row in std::iter::once(&header).chain(formatted_values.iter()) {
71        for (col_idx, cell) in row.iter().enumerate() {
72            let cell_width = cell.content().width() + 3; // +3 for left separator + left padding + right padding
73            column_widths[col_idx] = column_widths[col_idx].max(cell_width);
74        }
75    }
76
77    // Count how many columns fit within the maximum table width constraint
78    let nb_displayed_columns = if max_width == 0 {
79        // no constraint: all columns fit
80        total_fields
81    } else {
82        let mut table_width = 1; // initial width of 1 for the rightmost table separator
83        let mut fit_columns = 0; // number of columns that fit within max_width: the remaining columns will be dropped
84        for width in column_widths {
85            let col_width = width.min(min_compacted_col_width).max(4); // lower bound of 4 for each column width: left separator + left padding + data + right padding
86            if table_width + col_width > max_width {
87                break;
88            }
89            table_width += col_width;
90            fit_columns += 1;
91        }
92        fit_columns
93    };
94
95    // Adjust the preset for eventual truncated columns
96    let table_preset = if nb_displayed_columns == total_fields {
97        DEFAULT_PRESET
98    } else {
99        TRUNCATED_PRESET
100    };
101
102    // Build the final adjusted table
103    let mut table = Table::new();
104    table
105        .force_no_tty()
106        .load_preset(table_preset)
107        .set_content_arrangement(ContentArrangement::Dynamic)
108        .set_header(header.into_iter().take(nb_displayed_columns));
109
110    // Ellipsis truncation indicator requires comfy-table >= 7.1.4.
111    // Arrow currently pins comfy-table to 7.1.2 to preserve its MSRV
112    // (comfy-table 7.2.0 bumped MSRV to Rust 1.85 while Arrow remains at 1.84).
113    // See https://github.com/apache/arrow-rs/issues/8243 and https://github.com/apache/arrow-rs/pull/8244.
114    // Arrow chose an exact pin instead of a `~7.1` requirement; the latter would
115    // also preserve MSRV while allowing 7.1.x (including 7.1.4).
116    // Re-enable once Arrow relaxes this pin to allow >= 7.1.4.
117    //table.set_truncation_indicator("…");
118
119    for formatted_row in formatted_values {
120        table.add_row(formatted_row.into_iter().take(nb_displayed_columns));
121    }
122
123    // Apply row height and table width constraints if provided
124    if max_row_height > 0 {
125        for row in table.row_iter_mut() {
126            row.max_height(max_row_height);
127        }
128    }
129
130    if max_width > 0 {
131        table.set_width(max_width as u16);
132    }
133
134    Ok(table.to_string())
135}
136
137#[cfg(test)]
138mod tests {
139    use super::pretty_format_compact_batch;
140    use datafusion::arrow::array::{Int64Array, StringArray};
141    use datafusion::arrow::datatypes::{DataType, Field, Schema};
142    use datafusion::arrow::error::ArrowError;
143    use datafusion::arrow::record_batch::RecordBatch;
144    use insta::{assert_snapshot, Settings};
145    use std::sync::Arc;
146
147    fn insta_settings() -> Settings {
148        let mut settings = Settings::clone_current();
149
150        settings.set_prepend_module_to_snapshot(false);
151
152        settings
153    }
154
155    #[test]
156    fn test_pretty_format_no_constraints() -> Result<(), ArrowError> {
157        assert_formatting(0, 0, 0, "pretty_format_no_constraints")
158    }
159    #[test]
160    fn test_pretty_format_table_width() -> Result<(), ArrowError> {
161        assert_formatting(25, 3, 0, "pretty_format_table_width")
162    }
163    #[test]
164    fn test_pretty_format_all_constraints_narrow() -> Result<(), ArrowError> {
165        assert_formatting(25, 3, 12, "pretty_format_all_constraints_narrow")
166    }
167    #[test]
168    fn test_pretty_format_all_constraints_wide() -> Result<(), ArrowError> {
169        assert_formatting(76, 3, 12, "pretty_format_all_constraints_wide")
170    }
171
172    fn assert_formatting(
173        max_width: usize,
174        max_row_height: usize,
175        min_compacted_col_width: usize,
176        test_name: &str,
177    ) -> Result<(), ArrowError> {
178        let batch = create_sample_batch();
179        let result = pretty_format_compact_batch(
180            &batch,
181            max_width,
182            max_row_height,
183            min_compacted_col_width,
184        )?
185        .to_string();
186
187        insta_settings().bind(|| assert_snapshot!(test_name, result));
188
189        Ok(())
190    }
191
192    fn create_sample_batch() -> RecordBatch {
193        let schema = Arc::new(Schema::new(vec![
194            Field::new("a", DataType::Int64, false),
195            Field::new("country", DataType::Utf8, false),
196            Field::new("description", DataType::Utf8, false),
197            Field::new("city_name", DataType::Utf8, false),
198            Field::new("emojis", DataType::Utf8, false),
199            Field::new("chinese name", DataType::Utf8, false),
200            Field::new("pop_count", DataType::Int64, false),
201        ]));
202
203        let a = Arc::new(Int64Array::from(vec![1, 3, 7]));
204        let country =
205            Arc::new(StringArray::from(vec!["France", "United Kingdom", "Spain"]));
206        let description = Arc::new(StringArray::from(vec![
207            "Paris is renowned as the City of Light, celebrated for its rich history, magnificent architecture, and vibrant arts scene. The city boasts iconic landmarks such as the Eiffel Tower and the Louvre, along with charming streets, quaint cafés, and a deep cultural heritage that continues to inspire artists, writers, and travelers from around the world.",
208            "London is a dynamic and cosmopolitan metropolis that seamlessly blends its storied past with modern innovation. The city offers an array of historical sites, diverse neighborhoods, and world-class museums and theaters. Its bustling markets, green parks, and ever-evolving cultural scene make London a hub of creativity, commerce, and community life.",
209            "Barcelona is a lively coastal city known for its striking modernist architecture, Mediterranean beaches, and eclectic cultural offerings. From the whimsical creations of Antoni Gaudí to the vibrant street life and renowned culinary delights, Barcelona captivates visitors with its unique blend of historic charm and contemporary energy."
210        ]));
211        let city_name = Arc::new(StringArray::from(vec!["Paris", "London", "Barcelona"]));
212        let emojis = Arc::new(StringArray::from(vec![
213            "🗼🥖🍷🎨🚲🏰🌟",
214            "🇬🇧🚕🏰🎡🎩☕",
215            "🌞🎨⚽🍴🎉🌊",
216        ]));
217        let chinese_name = Arc::new(StringArray::from(vec!["巴黎", "倫敦", "巴塞隆納"]));
218        let pop_count = Arc::new(Int64Array::from(vec![321, 987654, 2]));
219
220        RecordBatch::try_new(
221            schema,
222            vec![
223                a,
224                country,
225                description,
226                city_name,
227                emojis,
228                chinese_name,
229                pop_count,
230            ],
231        )
232        .unwrap()
233    }
234}