datafusion_tracing/
preview_utils.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17//
18// This product includes software developed at Datadog (https://www.datadoghq.com/) Copyright 2025 Datadog, Inc.
19
20use comfy_table::{Cell, ContentArrangement, Table};
21use datafusion::arrow::{
22    array::RecordBatch,
23    error::ArrowError,
24    util::display::{ArrayFormatter, FormatOptions},
25};
26use std::fmt::Display;
27use unicode_width::UnicodeWidthStr;
28
29const DEFAULT_PRESET: &str = "||--|=+||-+||++++++";
30const TRUNCATED_PRESET: &str = "|…--|=+…|-+|…+++…+…";
31
32/// Formats a `RecordBatch` as a neatly aligned ASCII table,
33/// constraining the total width to `max_width`. Columns are
34/// dynamically resized or truncated, and columns that cannot
35/// fit within the given width may be dropped.
36pub fn pretty_format_compact_batch(
37    batch: &RecordBatch,
38    max_width: usize,
39    max_row_height: usize,
40    min_compacted_col_width: usize,
41) -> Result<impl Display, ArrowError> {
42    let schema = batch.schema();
43    let total_fields = schema.fields().len();
44    let format_opts = FormatOptions::default().with_display_error(true);
45
46    // Initialize header and column formatters
47    let header: Vec<Cell> = schema
48        .fields()
49        .iter()
50        .map(|f| Cell::new(f.name()))
51        .collect();
52    let formatters: Vec<_> = batch
53        .columns()
54        .iter()
55        .map(|col| ArrayFormatter::try_new(col.as_ref(), &format_opts))
56        .collect::<Result<_, ArrowError>>()?;
57
58    // Generate the 2d array of formatted values
59    let formatted_values: Vec<Vec<Cell>> = (0..batch.num_rows())
60        .map(|row_idx| {
61            formatters
62                .iter()
63                .map(|fmt| Cell::new(fmt.value(row_idx)))
64                .collect()
65        })
66        .collect();
67
68    // Compute column widths as the maximum width of each cell for that column, header included
69    let mut column_widths = vec![0; total_fields];
70    for row in std::iter::once(&header).chain(formatted_values.iter()) {
71        for (col_idx, cell) in row.iter().enumerate() {
72            let cell_width = cell.content().width() + 3; // +3 for left separator + left padding + right padding
73            column_widths[col_idx] = column_widths[col_idx].max(cell_width);
74        }
75    }
76
77    // Count how many columns fit within the maximum table width constraint
78    let nb_displayed_columns = if max_width == 0 {
79        // no constraint: all columns fit
80        total_fields
81    } else {
82        let mut table_width = 1; // initial width of 1 for the rightmost table separator
83        let mut fit_columns = 0; // number of columns that fit within max_width: the remaining columns will be dropped
84        for width in column_widths {
85            let col_width = width.min(min_compacted_col_width).max(4); // lower bound of 4 for each column width: left separator + left padding + data + right padding
86            if table_width + col_width > max_width {
87                break;
88            }
89            table_width += col_width;
90            fit_columns += 1;
91        }
92        fit_columns
93    };
94
95    // Adjust the preset for eventual truncated columns
96    let table_preset = if nb_displayed_columns == total_fields {
97        DEFAULT_PRESET
98    } else {
99        TRUNCATED_PRESET
100    };
101
102    // Build the final adjusted table
103    let mut table = Table::new();
104    table
105        .force_no_tty()
106        .load_preset(table_preset)
107        .set_content_arrangement(ContentArrangement::Dynamic)
108        .set_header(header.into_iter().take(nb_displayed_columns))
109        .set_truncation_indicator("…");
110
111    for formatted_row in formatted_values {
112        table.add_row(formatted_row.into_iter().take(nb_displayed_columns));
113    }
114
115    // Apply row height and table width constraints if provided
116    if max_row_height > 0 {
117        for row in table.row_iter_mut() {
118            row.max_height(max_row_height);
119        }
120    }
121
122    if max_width > 0 {
123        table.set_width(max_width as u16);
124    }
125
126    Ok(table.to_string())
127}
128
129#[cfg(test)]
130mod tests {
131    use super::pretty_format_compact_batch;
132    use datafusion::arrow::array::{Int64Array, StringArray};
133    use datafusion::arrow::datatypes::{DataType, Field, Schema};
134    use datafusion::arrow::error::ArrowError;
135    use datafusion::arrow::record_batch::RecordBatch;
136    use insta::{Settings, assert_snapshot};
137    use std::sync::Arc;
138
139    fn insta_settings() -> Settings {
140        let mut settings = Settings::clone_current();
141
142        settings.set_prepend_module_to_snapshot(false);
143
144        settings
145    }
146
147    #[test]
148    fn test_pretty_format_no_constraints() -> Result<(), ArrowError> {
149        assert_formatting(0, 0, 0, "pretty_format_no_constraints")
150    }
151    #[test]
152    fn test_pretty_format_table_width() -> Result<(), ArrowError> {
153        assert_formatting(25, 3, 0, "pretty_format_table_width")
154    }
155    #[test]
156    fn test_pretty_format_all_constraints_narrow() -> Result<(), ArrowError> {
157        assert_formatting(25, 3, 12, "pretty_format_all_constraints_narrow")
158    }
159    #[test]
160    fn test_pretty_format_all_constraints_wide() -> Result<(), ArrowError> {
161        assert_formatting(76, 3, 12, "pretty_format_all_constraints_wide")
162    }
163
164    fn assert_formatting(
165        max_width: usize,
166        max_row_height: usize,
167        min_compacted_col_width: usize,
168        test_name: &str,
169    ) -> Result<(), ArrowError> {
170        let batch = create_sample_batch();
171        let result = pretty_format_compact_batch(
172            &batch,
173            max_width,
174            max_row_height,
175            min_compacted_col_width,
176        )?
177        .to_string();
178
179        insta_settings().bind(|| assert_snapshot!(test_name, result));
180
181        Ok(())
182    }
183
184    fn create_sample_batch() -> RecordBatch {
185        let schema = Arc::new(Schema::new(vec![
186            Field::new("a", DataType::Int64, false),
187            Field::new("country", DataType::Utf8, false),
188            Field::new("description", DataType::Utf8, false),
189            Field::new("city_name", DataType::Utf8, false),
190            Field::new("emojis", DataType::Utf8, false),
191            Field::new("chinese name", DataType::Utf8, false),
192            Field::new("pop_count", DataType::Int64, false),
193        ]));
194
195        let a = Arc::new(Int64Array::from(vec![1, 3, 7]));
196        let country =
197            Arc::new(StringArray::from(vec!["France", "United Kingdom", "Spain"]));
198        let description = Arc::new(StringArray::from(vec![
199            "Paris is renowned as the City of Light, celebrated for its rich history, magnificent architecture, and vibrant arts scene. The city boasts iconic landmarks such as the Eiffel Tower and the Louvre, along with charming streets, quaint cafés, and a deep cultural heritage that continues to inspire artists, writers, and travelers from around the world.",
200            "London is a dynamic and cosmopolitan metropolis that seamlessly blends its storied past with modern innovation. The city offers an array of historical sites, diverse neighborhoods, and world-class museums and theaters. Its bustling markets, green parks, and ever-evolving cultural scene make London a hub of creativity, commerce, and community life.",
201            "Barcelona is a lively coastal city known for its striking modernist architecture, Mediterranean beaches, and eclectic cultural offerings. From the whimsical creations of Antoni Gaudí to the vibrant street life and renowned culinary delights, Barcelona captivates visitors with its unique blend of historic charm and contemporary energy.",
202        ]));
203        let city_name = Arc::new(StringArray::from(vec!["Paris", "London", "Barcelona"]));
204        let emojis = Arc::new(StringArray::from(vec![
205            "🗼🥖🍷🎨🚲🏰🌟",
206            "🇬🇧🚕🏰🎡🎩☕",
207            "🌞🎨⚽🍴🎉🌊",
208        ]));
209        let chinese_name = Arc::new(StringArray::from(vec!["巴黎", "倫敦", "巴塞隆納"]));
210        let pop_count = Arc::new(Int64Array::from(vec![321, 987654, 2]));
211
212        RecordBatch::try_new(
213            schema,
214            vec![
215                a,
216                country,
217                description,
218                city_name,
219                emojis,
220                chinese_name,
221                pop_count,
222            ],
223        )
224        .unwrap()
225    }
226}