arrow_schema/
datatype_display.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, fmt};
19
20use crate::DataType;
21
22impl fmt::Display for DataType {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        fn format_metadata(metadata: &HashMap<String, String>) -> String {
25            if metadata.is_empty() {
26                String::new()
27            } else {
28                format!(", metadata: {metadata:?}")
29            }
30        }
31
32        fn format_field(field: &crate::Field) -> String {
33            let name = field.name();
34            let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
35            let data_type = field.data_type();
36            let metadata_str = format_metadata(field.metadata());
37            format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
38        }
39
40        // A lot of these can still be improved a lot.
41        // _Some_ of these can be parsed with `FromStr`, but not all (YET!).
42        // The goal is that the formatting should always be
43        // * Terse and teadable
44        // * Reversible (contain all necessary information to reverse it perfectly)
45
46        match &self {
47            Self::Null => write!(f, "Null"),
48            Self::Boolean => write!(f, "Boolean"),
49            Self::Int8 => write!(f, "Int8"),
50            Self::Int16 => write!(f, "Int16"),
51            Self::Int32 => write!(f, "Int32"),
52            Self::Int64 => write!(f, "Int64"),
53            Self::UInt8 => write!(f, "UInt8"),
54            Self::UInt16 => write!(f, "UInt16"),
55            Self::UInt32 => write!(f, "UInt32"),
56            Self::UInt64 => write!(f, "UInt64"),
57            Self::Float16 => write!(f, "Float16"),
58            Self::Float32 => write!(f, "Float32"),
59            Self::Float64 => write!(f, "Float64"),
60            Self::Timestamp(time_unit, timezone) => {
61                if let Some(timezone) = timezone {
62                    write!(f, "Timestamp({time_unit}, {timezone:?})")
63                } else {
64                    write!(f, "Timestamp({time_unit})")
65                }
66            }
67            Self::Date32 => write!(f, "Date32"),
68            Self::Date64 => write!(f, "Date64"),
69            Self::Time32(time_unit) => write!(f, "Time32({time_unit})"),
70            Self::Time64(time_unit) => write!(f, "Time64({time_unit})"),
71            Self::Duration(time_unit) => write!(f, "Duration({time_unit})"),
72            Self::Interval(interval_unit) => write!(f, "Interval({interval_unit:?})"),
73            Self::Binary => write!(f, "Binary"),
74            Self::FixedSizeBinary(bytes_per_value) => {
75                write!(f, "FixedSizeBinary({bytes_per_value:?})")
76            }
77            Self::LargeBinary => write!(f, "LargeBinary"),
78            Self::BinaryView => write!(f, "BinaryView"),
79            Self::Utf8 => write!(f, "Utf8"),
80            Self::LargeUtf8 => write!(f, "LargeUtf8"),
81            Self::Utf8View => write!(f, "Utf8View"),
82            Self::List(field)
83            | Self::LargeList(field)
84            | Self::ListView(field)
85            | Self::LargeListView(field) => {
86                let type_name = if matches!(self, Self::List(_)) {
87                    "List"
88                } else if matches!(self, Self::ListView(_)) {
89                    "ListView"
90                } else if matches!(self, Self::LargeList(_)) {
91                    "LargeList"
92                } else {
93                    "LargeListView"
94                };
95
96                let name = field.name();
97                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
98                let data_type = field.data_type();
99                let field_name_str = if name == "item" {
100                    String::default()
101                } else {
102                    format!(", field: '{name}'")
103                };
104                let metadata_str = format_metadata(field.metadata());
105
106                // e.g. `LargeList(nullable Uint32)
107                write!(
108                    f,
109                    "{type_name}({maybe_nullable}{data_type}{field_name_str}{metadata_str})"
110                )
111            }
112            Self::FixedSizeList(field, size) => {
113                let name = field.name();
114                let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
115                let data_type = field.data_type();
116                let field_name_str = if name == "item" {
117                    String::default()
118                } else {
119                    format!(", field: '{name}'")
120                };
121                let metadata_str = format_metadata(field.metadata());
122
123                write!(
124                    f,
125                    "FixedSizeList({size} x {maybe_nullable}{data_type}{field_name_str}{metadata_str})",
126                )
127            }
128            Self::Struct(fields) => {
129                write!(f, "Struct(")?;
130                if !fields.is_empty() {
131                    let fields_str = fields
132                        .iter()
133                        .map(|field| format_field(field))
134                        .collect::<Vec<_>>()
135                        .join(", ");
136                    write!(f, "{fields_str}")?;
137                }
138                write!(f, ")")?;
139                Ok(())
140            }
141            Self::Union(union_fields, union_mode) => {
142                write!(f, "Union({union_mode:?}, ")?;
143                if !union_fields.is_empty() {
144                    let fields_str = union_fields
145                        .iter()
146                        .map(|v| {
147                            let type_id = v.0;
148                            let field_str = format_field(v.1);
149                            format!("{type_id:?}: ({field_str})")
150                        })
151                        .collect::<Vec<_>>()
152                        .join(", ");
153                    write!(f, "{fields_str}")?;
154                }
155                write!(f, ")")?;
156                Ok(())
157            }
158            Self::Dictionary(data_type, data_type1) => {
159                write!(f, "Dictionary({data_type}, {data_type1})")
160            }
161            Self::Decimal32(precision, scale) => write!(f, "Decimal32({precision}, {scale})"),
162            Self::Decimal64(precision, scale) => write!(f, "Decimal64({precision}, {scale})"),
163            Self::Decimal128(precision, scale) => write!(f, "Decimal128({precision}, {scale})"),
164            Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
165            Self::Map(field, sorted) => {
166                write!(f, "Map(")?;
167                let map_field_str = format_field(field);
168                let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };
169
170                write!(f, "{map_field_str}, {keys_are_sorted})")?;
171                Ok(())
172            }
173            Self::RunEndEncoded(run_ends_field, values_field) => {
174                write!(f, "RunEndEncoded(")?;
175                let run_ends_str = format_field(run_ends_field);
176                let values_str = format_field(values_field);
177
178                write!(f, "{run_ends_str}, {values_str})")?;
179                Ok(())
180            }
181        }
182    }
183}
184
185#[cfg(test)]
186mod tests {
187
188    use std::sync::Arc;
189
190    use crate::Field;
191
192    use super::*;
193
194    #[test]
195    fn test_display_list() {
196        let list_data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
197        let list_data_type_string = list_data_type.to_string();
198        let expected_string = "List(nullable Int32)";
199        assert_eq!(list_data_type_string, expected_string);
200    }
201
202    #[test]
203    fn test_display_list_view() {
204        let list_view_data_type =
205            DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
206        let list_view_data_type_string = list_view_data_type.to_string();
207        let expected_string = "ListView(nullable Int32)";
208        assert_eq!(list_view_data_type_string, expected_string);
209    }
210
211    #[test]
212    fn test_display_list_with_named_field() {
213        let list_data_type = DataType::List(Arc::new(Field::new("foo", DataType::UInt64, false)));
214        let list_data_type_string = list_data_type.to_string();
215        let expected_string = "List(UInt64, field: 'foo')";
216        assert_eq!(list_data_type_string, expected_string);
217    }
218
219    #[test]
220    fn test_display_list_view_with_named_field() {
221        let list_view_data_type =
222            DataType::ListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
223        let list_view_data_type_string = list_view_data_type.to_string();
224        let expected_string = "ListView(UInt64, field: 'bar')";
225        assert_eq!(list_view_data_type_string, expected_string);
226    }
227
228    #[test]
229    fn test_display_nested_list() {
230        let nested_data_type = DataType::List(Arc::new(Field::new_list_field(
231            DataType::List(Arc::new(Field::new_list_field(DataType::UInt64, false))),
232            false,
233        )));
234        let nested_data_type_string = nested_data_type.to_string();
235        let nested_expected_string = "List(List(UInt64))";
236        assert_eq!(nested_data_type_string, nested_expected_string);
237    }
238
239    #[test]
240    fn test_display_nested_list_view() {
241        let nested_view_data_type = DataType::ListView(Arc::new(Field::new_list_field(
242            DataType::ListView(Arc::new(Field::new_list_field(DataType::UInt64, false))),
243            false,
244        )));
245        let nested_view_data_type_string = nested_view_data_type.to_string();
246        let nested_view_expected_string = "ListView(ListView(UInt64))";
247        assert_eq!(nested_view_data_type_string, nested_view_expected_string);
248    }
249
250    #[test]
251    fn test_display_list_with_metadata() {
252        let mut field = Field::new_list_field(DataType::Int32, true);
253        let metadata = HashMap::from([("foo1".to_string(), "value1".to_string())]);
254        field.set_metadata(metadata);
255        let list_data_type = DataType::List(Arc::new(field));
256        let list_data_type_string = list_data_type.to_string();
257        let expected_string = "List(nullable Int32, metadata: {\"foo1\": \"value1\"})";
258
259        assert_eq!(list_data_type_string, expected_string);
260    }
261
262    #[test]
263    fn test_display_list_view_with_metadata() {
264        let mut field = Field::new_list_field(DataType::Int32, true);
265        let metadata = HashMap::from([("foo2".to_string(), "value2".to_string())]);
266        field.set_metadata(metadata);
267        let list_view_data_type = DataType::ListView(Arc::new(field));
268        let list_view_data_type_string = list_view_data_type.to_string();
269        let expected_string = "ListView(nullable Int32, metadata: {\"foo2\": \"value2\"})";
270        assert_eq!(list_view_data_type_string, expected_string);
271    }
272
273    #[test]
274    fn test_display_large_list() {
275        let large_list_data_type =
276            DataType::LargeList(Arc::new(Field::new_list_field(DataType::Int32, true)));
277        let large_list_data_type_string = large_list_data_type.to_string();
278        let expected_string = "LargeList(nullable Int32)";
279        assert_eq!(large_list_data_type_string, expected_string);
280
281        // Test with named field
282        let large_list_named =
283            DataType::LargeList(Arc::new(Field::new("bar", DataType::UInt64, false)));
284        let large_list_named_string = large_list_named.to_string();
285        let expected_named_string = "LargeList(UInt64, field: 'bar')";
286        assert_eq!(large_list_named_string, expected_named_string);
287
288        // Test with metadata
289        let mut field = Field::new_list_field(DataType::Int32, true);
290        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
291        field.set_metadata(metadata);
292        let large_list_metadata = DataType::LargeList(Arc::new(field));
293        let large_list_metadata_string = large_list_metadata.to_string();
294        let expected_metadata_string =
295            "LargeList(nullable Int32, metadata: {\"key1\": \"value1\"})";
296        assert_eq!(large_list_metadata_string, expected_metadata_string);
297    }
298
299    #[test]
300    fn test_display_large_list_view() {
301        let large_list_view_data_type =
302            DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
303        let large_list_view_data_type_string = large_list_view_data_type.to_string();
304        let expected_string = "LargeListView(nullable Int32)";
305        assert_eq!(large_list_view_data_type_string, expected_string);
306
307        // Test with named field
308        let large_list_view_named =
309            DataType::LargeListView(Arc::new(Field::new("bar", DataType::UInt64, false)));
310        let large_list_view_named_string = large_list_view_named.to_string();
311        let expected_named_string = "LargeListView(UInt64, field: 'bar')";
312        assert_eq!(large_list_view_named_string, expected_named_string);
313
314        // Test with metadata
315        let mut field = Field::new_list_field(DataType::Int32, true);
316        let metadata = HashMap::from([("key1".to_string(), "value1".to_string())]);
317        field.set_metadata(metadata);
318        let large_list_view_metadata = DataType::LargeListView(Arc::new(field));
319        let large_list_view_metadata_string = large_list_view_metadata.to_string();
320        let expected_metadata_string =
321            "LargeListView(nullable Int32, metadata: {\"key1\": \"value1\"})";
322        assert_eq!(large_list_view_metadata_string, expected_metadata_string);
323    }
324
325    #[test]
326    fn test_display_fixed_size_list() {
327        let fixed_size_list =
328            DataType::FixedSizeList(Arc::new(Field::new_list_field(DataType::Int32, true)), 5);
329        let fixed_size_list_string = fixed_size_list.to_string();
330        let expected_string = "FixedSizeList(5 x nullable Int32)";
331        assert_eq!(fixed_size_list_string, expected_string);
332
333        // Test with named field
334        let fixed_size_named =
335            DataType::FixedSizeList(Arc::new(Field::new("baz", DataType::UInt64, false)), 3);
336        let fixed_size_named_string = fixed_size_named.to_string();
337        let expected_named_string = "FixedSizeList(3 x UInt64, field: 'baz')";
338        assert_eq!(fixed_size_named_string, expected_named_string);
339
340        // Test with metadata
341        let mut field = Field::new_list_field(DataType::Int32, true);
342        let metadata = HashMap::from([("key2".to_string(), "value2".to_string())]);
343        field.set_metadata(metadata);
344        let fixed_size_metadata = DataType::FixedSizeList(Arc::new(field), 4);
345        let fixed_size_metadata_string = fixed_size_metadata.to_string();
346        let expected_metadata_string =
347            "FixedSizeList(4 x nullable Int32, metadata: {\"key2\": \"value2\"})";
348        assert_eq!(fixed_size_metadata_string, expected_metadata_string);
349    }
350
351    #[test]
352    fn test_display_struct() {
353        let fields = vec![
354            Field::new("a", DataType::Int32, false),
355            Field::new("b", DataType::Utf8, true),
356        ];
357        let struct_data_type = DataType::Struct(fields.into());
358        let struct_data_type_string = struct_data_type.to_string();
359        let expected_string = "Struct(\"a\": Int32, \"b\": nullable Utf8)";
360        assert_eq!(struct_data_type_string, expected_string);
361
362        // Test with metadata
363        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
364        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
365        field_with_metadata.set_metadata(metadata);
366        let struct_fields_with_metadata =
367            vec![Field::new("a", DataType::Int32, false), field_with_metadata];
368        let struct_data_type_with_metadata = DataType::Struct(struct_fields_with_metadata.into());
369        let struct_data_type_with_metadata_string = struct_data_type_with_metadata.to_string();
370        let expected_string_with_metadata =
371            "Struct(\"a\": Int32, \"b\": nullable Utf8, metadata: {\"key\": \"value\"})";
372        assert_eq!(
373            struct_data_type_with_metadata_string,
374            expected_string_with_metadata
375        );
376    }
377
378    #[test]
379    fn test_display_union() {
380        let fields = vec![
381            Field::new("a", DataType::Int32, false),
382            Field::new("b", DataType::Utf8, true),
383        ];
384        let type_ids = vec![0, 1];
385        let union_fields = type_ids
386            .into_iter()
387            .zip(fields.into_iter().map(Arc::new))
388            .collect();
389
390        let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
391        let union_data_type_string = union_data_type.to_string();
392        let expected_string = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8))";
393        assert_eq!(union_data_type_string, expected_string);
394
395        // Test with metadata
396        let mut field_with_metadata = Field::new("b", DataType::Utf8, true);
397        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
398        field_with_metadata.set_metadata(metadata);
399        let union_fields_with_metadata = vec![
400            (0, Arc::new(Field::new("a", DataType::Int32, false))),
401            (1, Arc::new(field_with_metadata)),
402        ]
403        .into_iter()
404        .collect();
405        let union_data_type_with_metadata =
406            DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
407        let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
408        let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8, metadata: {\"key\": \"value\"}))";
409        assert_eq!(
410            union_data_type_with_metadata_string,
411            expected_string_with_metadata
412        );
413    }
414
415    #[test]
416    fn test_display_map() {
417        let entry_field = Field::new(
418            "entries",
419            DataType::Struct(
420                vec![
421                    Field::new("key", DataType::Utf8, false),
422                    Field::new("value", DataType::Int32, true),
423                ]
424                .into(),
425            ),
426            false,
427        );
428        let map_data_type = DataType::Map(Arc::new(entry_field), true);
429        let map_data_type_string = map_data_type.to_string();
430        let expected_string =
431            "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable Int32), sorted)";
432        assert_eq!(map_data_type_string, expected_string);
433
434        // Test with metadata
435        let mut entry_field_with_metadata = Field::new(
436            "entries",
437            DataType::Struct(
438                vec![
439                    Field::new("key", DataType::Utf8, false),
440                    Field::new("value", DataType::Int32, true),
441                ]
442                .into(),
443            ),
444            false,
445        );
446        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
447        entry_field_with_metadata.set_metadata(metadata);
448        let map_data_type_with_metadata = DataType::Map(Arc::new(entry_field_with_metadata), true);
449        let map_data_type_with_metadata_string = map_data_type_with_metadata.to_string();
450        let expected_string_with_metadata = "Map(\"entries\": Struct(\"key\": Utf8, \"value\": nullable Int32), metadata: {\"key\": \"value\"}, sorted)";
451        assert_eq!(
452            map_data_type_with_metadata_string,
453            expected_string_with_metadata
454        );
455    }
456
457    #[test]
458    fn test_display_run_end_encoded() {
459        let run_ends_field = Arc::new(Field::new("run_ends", DataType::UInt32, false));
460        let values_field = Arc::new(Field::new("values", DataType::Int32, true));
461        let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(), values_field.clone());
462        let ree_data_type_string = ree_data_type.to_string();
463        let expected_string = "RunEndEncoded(\"run_ends\": UInt32, \"values\": nullable Int32)";
464        assert_eq!(ree_data_type_string, expected_string);
465
466        // Test with metadata
467        let mut run_ends_field_with_metadata = Field::new("run_ends", DataType::UInt32, false);
468        let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
469        run_ends_field_with_metadata.set_metadata(metadata);
470        let ree_data_type_with_metadata =
471            DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata), values_field.clone());
472        let ree_data_type_with_metadata_string = ree_data_type_with_metadata.to_string();
473        let expected_string_with_metadata = "RunEndEncoded(\"run_ends\": UInt32, metadata: {\"key\": \"value\"}, \"values\": nullable Int32)";
474        assert_eq!(
475            ree_data_type_with_metadata_string,
476            expected_string_with_metadata
477        );
478    }
479
480    #[test]
481    fn test_display_dictionary() {
482        let dict_data_type =
483            DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
484        let dict_data_type_string = dict_data_type.to_string();
485        let expected_string = "Dictionary(Int8, Utf8)";
486        assert_eq!(dict_data_type_string, expected_string);
487
488        // Test with complex index and value types
489        let complex_dict_data_type = DataType::Dictionary(
490            Box::new(DataType::Int16),
491            Box::new(DataType::Struct(
492                vec![
493                    Field::new("a", DataType::Int32, false),
494                    Field::new("b", DataType::Utf8, true),
495                ]
496                .into(),
497            )),
498        );
499        let complex_dict_data_type_string = complex_dict_data_type.to_string();
500        let expected_complex_string =
501            "Dictionary(Int16, Struct(\"a\": Int32, \"b\": nullable Utf8))";
502        assert_eq!(complex_dict_data_type_string, expected_complex_string);
503    }
504
505    #[test]
506    fn test_display_interval() {
507        let interval_year_month = DataType::Interval(crate::IntervalUnit::YearMonth);
508        let interval_year_month_string = interval_year_month.to_string();
509        let expected_year_month_string = "Interval(YearMonth)";
510        assert_eq!(interval_year_month_string, expected_year_month_string);
511
512        let interval_day_time = DataType::Interval(crate::IntervalUnit::DayTime);
513        let interval_day_time_string = interval_day_time.to_string();
514        let expected_day_time_string = "Interval(DayTime)";
515        assert_eq!(interval_day_time_string, expected_day_time_string);
516
517        let interval_month_day_nano = DataType::Interval(crate::IntervalUnit::MonthDayNano);
518        let interval_month_day_nano_string = interval_month_day_nano.to_string();
519        let expected_month_day_nano_string = "Interval(MonthDayNano)";
520        assert_eq!(
521            interval_month_day_nano_string,
522            expected_month_day_nano_string
523        );
524    }
525
526    #[test]
527    fn test_display_timestamp() {
528        let timestamp_without_tz = DataType::Timestamp(crate::TimeUnit::Microsecond, None);
529        let timestamp_without_tz_string = timestamp_without_tz.to_string();
530        let expected_without_tz_string = "Timestamp(µs)";
531        assert_eq!(timestamp_without_tz_string, expected_without_tz_string);
532
533        let timestamp_with_tz =
534            DataType::Timestamp(crate::TimeUnit::Nanosecond, Some(Arc::from("UTC")));
535        let timestamp_with_tz_string = timestamp_with_tz.to_string();
536        let expected_with_tz_string = "Timestamp(ns, \"UTC\")";
537        assert_eq!(timestamp_with_tz_string, expected_with_tz_string);
538    }
539}