arrow_schema/
datatype_display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::DataType;
19use std::fmt::Display;
20use std::{collections::HashMap, fmt};
21
22impl Display for DataType {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        fn format_metadata(metadata: &HashMap<String, String>) -> String {
25            format!("{}", FormatMetadata(metadata))
26        }
27
28        fn format_nullability(field: &crate::Field) -> &str {
29            if field.is_nullable() { "" } else { "non-null " }
30        }
31
32        fn format_field(field: &crate::Field) -> String {
33            let name = field.name();
34            let maybe_nullable = format_nullability(field);
35            let data_type = field.data_type();
36            let metadata_str = format_metadata(field.metadata());
37            format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
38        }
39
40        // A lot of these can still be improved a lot.
41        // _Some_ of these can be parsed with `FromStr`, but not all (YET!).
42        // The goal is that the formatting should always be
43        // * Terse and teadable
44        // * Reversible (contain all necessary information to reverse it perfectly)
45
46        match &self {
47            Self::Null => write!(f, "Null"),
48            Self::Boolean => write!(f, "Boolean"),
49            Self::Int8 => write!(f, "Int8"),
50            Self::Int16 => write!(f, "Int16"),
51            Self::Int32 => write!(f, "Int32"),
52            Self::Int64 => write!(f, "Int64"),
53            Self::UInt8 => write!(f, "UInt8"),
54            Self::UInt16 => write!(f, "UInt16"),
55            Self::UInt32 => write!(f, "UInt32"),
56            Self::UInt64 => write!(f, "UInt64"),
57            Self::Float16 => write!(f, "Float16"),
58            Self::Float32 => write!(f, "Float32"),
59            Self::Float64 => write!(f, "Float64"),
60            Self::Timestamp(time_unit, timezone) => {
61                if let Some(timezone) = timezone {
62                    write!(f, "Timestamp({time_unit}, {timezone:?})")
63                } else {
64                    write!(f, "Timestamp({time_unit})")
65                }
66            }
67            Self::Date32 => write!(f, "Date32"),
68            Self::Date64 => write!(f, "Date64"),
69            Self::Time32(time_unit) => write!(f, "Time32({time_unit})"),
70            Self::Time64(time_unit) => write!(f, "Time64({time_unit})"),
71            Self::Duration(time_unit) => write!(f, "Duration({time_unit})"),
72            Self::Interval(interval_unit) => write!(f, "Interval({interval_unit:?})"),
73            Self::Binary => write!(f, "Binary"),
74            Self::FixedSizeBinary(bytes_per_value) => {
75                write!(f, "FixedSizeBinary({bytes_per_value:?})")
76            }
77            Self::LargeBinary => write!(f, "LargeBinary"),
78            Self::BinaryView => write!(f, "BinaryView"),
79            Self::Utf8 => write!(f, "Utf8"),
80            Self::LargeUtf8 => write!(f, "LargeUtf8"),
81            Self::Utf8View => write!(f, "Utf8View"),
82            Self::List(field)
83            | Self::LargeList(field)
84            | Self::ListView(field)
85            | Self::LargeListView(field) => {
86                let type_name = if matches!(self, Self::List(_)) {
87                    "List"
88                } else if matches!(self, Self::ListView(_)) {
89                    "ListView"
90                } else if matches!(self, Self::LargeList(_)) {
91                    "LargeList"
92                } else {
93                    "LargeListView"
94                };
95
96                let name = field.name();
97                let maybe_nullable = format_nullability(field);
98                let data_type = field.data_type();
99                let field_name_str = if name == "item" {
100                    String::default()
101                } else {
102                    format!(", field: '{name}'")
103                };
104                let metadata_str = format_metadata(field.metadata());
105
106                // e.g. `LargeList(non-null Uint32)
107                write!(
108                    f,
109                    "{type_name}({maybe_nullable}{data_type}{field_name_str}{metadata_str})"
110                )
111            }
112            Self::FixedSizeList(field, size) => {
113                let name = field.name();
114                let maybe_nullable = format_nullability(field);
115                let data_type = field.data_type();
116                let field_name_str = if name == "item" {
117                    String::default()
118                } else {
119                    format!(", field: '{name}'")
120                };
121                let metadata_str = format_metadata(field.metadata());
122
123                write!(
124                    f,
125                    "FixedSizeList({size} x {maybe_nullable}{data_type}{field_name_str}{metadata_str})",
126                )
127            }
128            Self::Struct(fields) => {
129                write!(f, "Struct(")?;
130                if !fields.is_empty() {
131                    let fields_str = fields
132                        .iter()
133                        .map(|field| format_field(field))
134                        .collect::<Vec<_>>()
135                        .join(", ");
136                    write!(f, "{fields_str}")?;
137                }
138                write!(f, ")")?;
139                Ok(())
140            }
141            Self::Union(union_fields, union_mode) => {
142                write!(f, "Union({union_mode:?}")?;
143                if !union_fields.is_empty() {
144                    write!(f, ", ")?;
145                    let fields_str = union_fields
146                        .iter()
147                        .map(|v| {
148                            let type_id = v.0;
149                            let field_str = format_field(v.1);
150                            format!("{type_id:?}: ({field_str})")
151                        })
152                        .collect::<Vec<_>>()
153                        .join(", ");
154                    write!(f, "{fields_str}")?;
155                }
156                write!(f, ")")?;
157                Ok(())
158            }
159            Self::Dictionary(data_type, data_type1) => {
160                write!(f, "Dictionary({data_type}, {data_type1})")
161            }
162            Self::Decimal32(precision, scale) => write!(f, "Decimal32({precision}, {scale})"),
163            Self::Decimal64(precision, scale) => write!(f, "Decimal64({precision}, {scale})"),
164            Self::Decimal128(precision, scale) => write!(f, "Decimal128({precision}, {scale})"),
165            Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
166            Self::Map(field, sorted) => {
167                write!(f, "Map(")?;
168                let map_field_str = format_field(field);
169                let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };
170
171                write!(f, "{map_field_str}, {keys_are_sorted})")?;
172                Ok(())
173            }
174            Self::RunEndEncoded(run_ends_field, values_field) => {
175                write!(f, "RunEndEncoded(")?;
176                let run_ends_str = format_field(run_ends_field);
177                let values_str = format_field(values_field);
178
179                write!(f, "{run_ends_str}, {values_str})")?;
180                Ok(())
181            }
182        }
183    }
184}
185
186/// Adapter to format a metadata HashMap consistently.
187struct FormatMetadata<'a>(&'a HashMap<String, String>);
188
189impl fmt::Display for FormatMetadata<'_> {
190    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
191        let metadata = self.0;
192        if metadata.is_empty() {
193            Ok(())
194        } else {
195            let mut entries: Vec<(&String, &String)> = metadata.iter().collect();
196            entries.sort_by(|a, b| a.0.cmp(b.0));
197            write!(f, ", metadata: ")?;
198            f.debug_map().entries(entries).finish()
199        }
200    }
201}
202
203#[cfg(test)]
204mod tests {
205
206    use std::sync::Arc;
207
208    use crate::Field;
209
210    use super::*;
211
212    #[test]
213    fn test_display_list() {
214        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
215        let list_data_type_string = list_data_type.to_string();
216        let expected_string = "List(Int32)";
217        assert_eq!(list_data_type_string, expected_string);
218    }
219
220    #[test]
221    fn test_display_list_view() {
222        let list_view_data_type =
223            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
224        let list_view_data_type_string = list_view_data_type.to_string();
225        let expected_string = "ListView(Int32)";
226        assert_eq!(list_view_data_type_string, expected_string);
227    }
228
229    #[test]
230    fn test_display_list_with_named_field() {
231        let list_data_type = DataType::List(Arc::new(Field::new("foo", DataType::UInt64, false)));
232        let list_data_type_string = list_data_type.to_string();
233        let expected_string = "List(non-null UInt64, field: 'foo')";
234        assert_eq!(list_data_type_string, expected_string);
235    }
236
237    #[test]
238    fn test_display_list_view_with_named_field() {
239        let list_view_data_type =
240            DataType::ListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
241        let list_view_data_type_string = list_view_data_type.to_string();
242        let expected_string = "ListView(non-null UInt64, field: 'bar')";
243        assert_eq!(list_view_data_type_string, expected_string);
244    }
245
246    #[test]
247    fn test_display_nested_list() {
248        let nested_data_type = DataType::List(Arc::new(Field::new_list_field(
249            DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
250            false,
251        )));
252        let nested_data_type_string = nested_data_type.to_string();
253        let nested_expected_string = "List(non-null List(non-null UInt64))";
254        assert_eq!(nested_data_type_string, nested_expected_string);
255    }
256
257    #[test]
258    fn test_display_nested_list_view() {
259        let nested_view_data_type = DataType::ListView(Arc::new(Field::new_list_field(
260            DataType::ListView(Arc::new(Field::new_list_field(DataType::UInt64, false))),
261            false,
262        )));
263        let nested_view_data_type_string = nested_view_data_type.to_string();
264        let nested_view_expected_string = "ListView(non-null ListView(non-null UInt64))";
265        assert_eq!(nested_view_data_type_string, nested_view_expected_string);
266    }
267
268    #[test]
269    fn test_display_list_with_metadata() {
270        let mut field = Field::new_list_field(DataType::Int32, true);
271        let metadata = HashMap::from([("foo1".to_string(), "value1".to_string())]);
272        field.set_metadata(metadata);
273        let list_data_type = DataType::List(Arc::new(field));
274        let list_data_type_string = list_data_type.to_string();
275        let expected_string = "List(Int32, metadata: {\"foo1\": \"value1\"})";
276
277        assert_eq!(list_data_type_string, expected_string);
278    }
279
280    #[test]
281    fn test_display_list_view_with_metadata() {
282        let mut field = Field::new_list_field(DataType::Int32, true);
283        let metadata = HashMap::from([("foo2".to_string(), "value2".to_string())]);
284        field.set_metadata(metadata);
285        let list_view_data_type = DataType::ListView(Arc::new(field));
286        let list_view_data_type_string = list_view_data_type.to_string();
287        let expected_string = "ListView(Int32, metadata: {\"foo2\": \"value2\"})";
288        assert_eq!(list_view_data_type_string, expected_string);
289    }
290
291    #[test]
292    fn test_display_large_list() {
293        let large_list_data_type =
294            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
295        let large_list_data_type_string = large_list_data_type.to_string();
296        let expected_string = "LargeList(Int32)";
297        assert_eq!(large_list_data_type_string, expected_string);
298
299        // Test with named field
300        let large_list_named =
301            DataType::LargeList(Arc::new(Field::new("bar", DataType::UInt64, false)));
302        let large_list_named_string = large_list_named.to_string();
303        let expected_named_string = "LargeList(non-null UInt64, field: 'bar')";
304        assert_eq!(large_list_named_string, expected_named_string);
305
306        // Test with metadata
307        let mut field = Field::new_list_field(DataType::Int32, true);
308        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
309        field.set_metadata(metadata);
310        let large_list_metadata = DataType::LargeList(Arc::new(field));
311        let large_list_metadata_string = large_list_metadata.to_string();
312        let expected_metadata_string = "LargeList(Int32, metadata: {\"key1\": \"value1\"})";
313        assert_eq!(large_list_metadata_string, expected_metadata_string);
314    }
315
316    #[test]
317    fn test_display_large_list_view() {
318        let large_list_view_data_type =
319            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
320        let large_list_view_data_type_string = large_list_view_data_type.to_string();
321        let expected_string = "LargeListView(Int32)";
322        assert_eq!(large_list_view_data_type_string, expected_string);
323
324        // Test with named field
325        let large_list_view_named =
326            DataType::LargeListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
327        let large_list_view_named_string = large_list_view_named.to_string();
328        let expected_named_string = "LargeListView(non-null UInt64, field: 'bar')";
329        assert_eq!(large_list_view_named_string, expected_named_string);
330
331        // Test with metadata
332        let mut field = Field::new_list_field(DataType::Int32, true);
333        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
334        field.set_metadata(metadata);
335        let large_list_view_metadata = DataType::LargeListView(Arc::new(field));
336        let large_list_view_metadata_string = large_list_view_metadata.to_string();
337        let expected_metadata_string = "LargeListView(Int32, metadata: {\"key1\": \"value1\"})";
338        assert_eq!(large_list_view_metadata_string, expected_metadata_string);
339    }
340
341    #[test]
342    fn test_display_fixed_size_list() {
343        let fixed_size_list =
344            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 5);
345        let fixed_size_list_string = fixed_size_list.to_string();
346        let expected_string = "FixedSizeList(5 x Int32)";
347        assert_eq!(fixed_size_list_string, expected_string);
348
349        // Test with named field
350        let fixed_size_named =
351            DataType::FixedSizeList(Arc::new(Field::new("baz", DataType::UInt64, false)), 3);
352        let fixed_size_named_string = fixed_size_named.to_string();
353        let expected_named_string = "FixedSizeList(3 x non-null UInt64, field: 'baz')";
354        assert_eq!(fixed_size_named_string, expected_named_string);
355
356        // Test with metadata
357        let mut field = Field::new_list_field(DataType::Int32, true);
358        let metadata = HashMap::from([("key2".to_string(), "value2".to_string())]);
359        field.set_metadata(metadata);
360        let fixed_size_metadata = DataType::FixedSizeList(Arc::new(field), 4);
361        let fixed_size_metadata_string = fixed_size_metadata.to_string();
362        let expected_metadata_string = "FixedSizeList(4 x Int32, metadata: {\"key2\": \"value2\"})";
363        assert_eq!(fixed_size_metadata_string, expected_metadata_string);
364    }
365
366    #[test]
367    fn test_display_struct() {
368        let fields = vec![
369            Field::new("a", DataType::Int32, false),
370            Field::new("b", DataType::Utf8, true),
371        ];
372        let struct_data_type = DataType::Struct(fields.into());
373        let struct_data_type_string = struct_data_type.to_string();
374        let expected_string = "Struct(\"a\": non-null Int32, \"b\": Utf8)";
375        assert_eq!(struct_data_type_string, expected_string);
376
377        // Test with metadata
378        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
379        let metadata = HashMap::from([
380            ("key".to_string(), "value".to_string()),
381            ("key2".to_string(), "value2".to_string()),
382        ]);
383        field_with_metadata.set_metadata(metadata);
384        let struct_fields_with_metadata =
385            vec![Field::new("a", DataType::Int32, false), field_with_metadata];
386        let struct_data_type_with_metadata = DataType::Struct(struct_fields_with_metadata.into());
387        let struct_data_type_with_metadata_string = struct_data_type_with_metadata.to_string();
388        let expected_string_with_metadata = "Struct(\"a\": non-null Int32, \"b\": Utf8, metadata: {\"key\": \"value\", \"key2\": \"value2\"})";
389        assert_eq!(
390            struct_data_type_with_metadata_string,
391            expected_string_with_metadata
392        );
393    }
394
395    #[test]
396    fn test_display_union() {
397        let fields = vec![
398            Field::new("a", DataType::Int32, false),
399            Field::new("b", DataType::Utf8, true),
400        ];
401        let type_ids = vec![0, 1];
402        let union_fields = type_ids
403            .into_iter()
404            .zip(fields.into_iter().map(Arc::new))
405            .collect();
406
407        let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
408        let union_data_type_string = union_data_type.to_string();
409        let expected_string = "Union(Sparse, 0: (\"a\": non-null Int32), 1: (\"b\": Utf8))";
410        assert_eq!(union_data_type_string, expected_string);
411
412        // Test with metadata
413        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
414        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
415        field_with_metadata.set_metadata(metadata);
416        let union_fields_with_metadata = vec![
417            (0, Arc::new(Field::new("a", DataType::Int32, false))),
418            (1, Arc::new(field_with_metadata)),
419        ]
420        .into_iter()
421        .collect();
422        let union_data_type_with_metadata =
423            DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
424        let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
425        let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": non-null Int32), 1: (\"b\": Utf8, metadata: {\"key\": \"value\"}))";
426        assert_eq!(
427            union_data_type_with_metadata_string,
428            expected_string_with_metadata
429        );
430    }
431
432    #[test]
433    fn test_display_map() {
434        let entry_field = Field::new(
435            "entries",
436            DataType::Struct(
437                vec![
438                    Field::new("key", DataType::Utf8, false),
439                    Field::new("value", DataType::Int32, true),
440                ]
441                .into(),
442            ),
443            false,
444        );
445        let map_data_type = DataType::Map(Arc::new(entry_field), true);
446        let map_data_type_string = map_data_type.to_string();
447        let expected_string =
448            "Map(\"entries\": non-null Struct(\"key\": non-null Utf8, \"value\": Int32), sorted)";
449        assert_eq!(map_data_type_string, expected_string);
450
451        // Test with metadata
452        let mut entry_field_with_metadata = Field::new(
453            "entries",
454            DataType::Struct(
455                vec![
456                    Field::new("key", DataType::Utf8, false),
457                    Field::new("value", DataType::Int32, true),
458                ]
459                .into(),
460            ),
461            false,
462        );
463        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
464        entry_field_with_metadata.set_metadata(metadata);
465        let map_data_type_with_metadata = DataType::Map(Arc::new(entry_field_with_metadata), true);
466        let map_data_type_with_metadata_string = map_data_type_with_metadata.to_string();
467        let expected_string_with_metadata = "Map(\"entries\": non-null Struct(\"key\": non-null Utf8, \"value\": Int32), metadata: {\"key\": \"value\"}, sorted)";
468        assert_eq!(
469            map_data_type_with_metadata_string,
470            expected_string_with_metadata
471        );
472    }
473
474    #[test]
475    fn test_display_run_end_encoded() {
476        let run_ends_field = Arc::new(Field::new("run_ends", DataType::UInt32, false));
477        let values_field = Arc::new(Field::new("values", DataType::Int32, true));
478        let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(), values_field.clone());
479        let ree_data_type_string = ree_data_type.to_string();
480        let expected_string = "RunEndEncoded(\"run_ends\": non-null UInt32, \"values\": Int32)";
481        assert_eq!(ree_data_type_string, expected_string);
482
483        // Test with metadata
484        let mut run_ends_field_with_metadata = Field::new("run_ends", DataType::UInt32, false);
485        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
486        run_ends_field_with_metadata.set_metadata(metadata);
487        let ree_data_type_with_metadata =
488            DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata), values_field.clone());
489        let ree_data_type_with_metadata_string = ree_data_type_with_metadata.to_string();
490        let expected_string_with_metadata = "RunEndEncoded(\"run_ends\": non-null UInt32, metadata: {\"key\": \"value\"}, \"values\": Int32)";
491        assert_eq!(
492            ree_data_type_with_metadata_string,
493            expected_string_with_metadata
494        );
495    }
496
497    #[test]
498    fn test_display_dictionary() {
499        let dict_data_type =
500            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
501        let dict_data_type_string = dict_data_type.to_string();
502        let expected_string = "Dictionary(Int8, Utf8)";
503        assert_eq!(dict_data_type_string, expected_string);
504
505        // Test with complex index and value types
506        let complex_dict_data_type = DataType::Dictionary(
507            Box::new(DataType::Int16),
508            Box::new(DataType::Struct(
509                vec![
510                    Field::new("a", DataType::Int32, false),
511                    Field::new("b", DataType::Utf8, true),
512                ]
513                .into(),
514            )),
515        );
516        let complex_dict_data_type_string = complex_dict_data_type.to_string();
517        let expected_complex_string =
518            "Dictionary(Int16, Struct(\"a\": non-null Int32, \"b\": Utf8))";
519        assert_eq!(complex_dict_data_type_string, expected_complex_string);
520    }
521
522    #[test]
523    fn test_display_interval() {
524        let interval_year_month = DataType::Interval(crate::IntervalUnit::YearMonth);
525        let interval_year_month_string = interval_year_month.to_string();
526        let expected_year_month_string = "Interval(YearMonth)";
527        assert_eq!(interval_year_month_string, expected_year_month_string);
528
529        let interval_day_time = DataType::Interval(crate::IntervalUnit::DayTime);
530        let interval_day_time_string = interval_day_time.to_string();
531        let expected_day_time_string = "Interval(DayTime)";
532        assert_eq!(interval_day_time_string, expected_day_time_string);
533
534        let interval_month_day_nano = DataType::Interval(crate::IntervalUnit::MonthDayNano);
535        let interval_month_day_nano_string = interval_month_day_nano.to_string();
536        let expected_month_day_nano_string = "Interval(MonthDayNano)";
537        assert_eq!(
538            interval_month_day_nano_string,
539            expected_month_day_nano_string
540        );
541    }
542
543    #[test]
544    fn test_display_timestamp() {
545        let timestamp_without_tz = DataType::Timestamp(crate::TimeUnit::Microsecond, None);
546        let timestamp_without_tz_string = timestamp_without_tz.to_string();
547        let expected_without_tz_string = "Timestamp(µs)";
548        assert_eq!(timestamp_without_tz_string, expected_without_tz_string);
549
550        let timestamp_with_tz =
551            DataType::Timestamp(crate::TimeUnit::Nanosecond, Some(Arc::from("UTC")));
552        let timestamp_with_tz_string = timestamp_with_tz.to_string();
553        let expected_with_tz_string = "Timestamp(ns, \"UTC\")";
554        assert_eq!(timestamp_with_tz_string, expected_with_tz_string);
555    }
556}