Skip to main content

alimentar/tui/
schema_inspector.rs

1//! Schema inspector widget for displaying Arrow schema information
2//!
3//! Displays field names, types, and nullable status in a table format.
4
5use super::{adapter::DatasetAdapter, format::truncate_string};
6
7/// Schema inspector widget for displaying dataset schema
8///
9/// Shows field names, Arrow types, and nullable status.
10///
11/// # Example
12///
13/// ```ignore
14/// let adapter = DatasetAdapter::from_dataset(&dataset)?;
15/// let inspector = SchemaInspector::new(&adapter);
16///
17/// for line in inspector.render_lines() {
18///     println!("{}", line);
19/// }
20/// ```
21#[derive(Debug, Clone)]
22pub struct SchemaInspector {
23    /// Field information: (name, type, nullable)
24    fields: Vec<FieldInfo>,
25    /// Display width
26    display_width: u16,
27}
28
29/// Information about a single field
30#[derive(Debug, Clone)]
31struct FieldInfo {
32    name: String,
33    type_name: String,
34    nullable: bool,
35}
36
37impl SchemaInspector {
38    /// Create a new schema inspector from a dataset adapter
39    pub fn new(adapter: &DatasetAdapter) -> Self {
40        Self::with_width(adapter, 80)
41    }
42
43    /// Create a new schema inspector with specific width
44    pub fn with_width(adapter: &DatasetAdapter, width: u16) -> Self {
45        let schema = adapter.schema();
46        let fields: Vec<FieldInfo> = schema
47            .fields()
48            .iter()
49            .map(|f| FieldInfo {
50                name: f.name().clone(),
51                type_name: format_type_name(f.data_type()),
52                nullable: f.is_nullable(),
53            })
54            .collect();
55
56        Self {
57            fields,
58            display_width: width,
59        }
60    }
61
62    /// Get the number of fields
63    pub fn field_count(&self) -> usize {
64        self.fields.len()
65    }
66
67    /// Check if schema is empty
68    pub fn is_empty(&self) -> bool {
69        self.fields.is_empty()
70    }
71
72    /// Get the display width
73    pub fn display_width(&self) -> u16 {
74        self.display_width
75    }
76
77    /// Render the schema as lines
78    pub fn render_lines(&self) -> Vec<String> {
79        let mut lines = Vec::with_capacity(self.fields.len() + 3);
80
81        // Calculate column widths
82        let name_width = self
83            .fields
84            .iter()
85            .map(|f| f.name.len())
86            .max()
87            .unwrap_or(5)
88            .max(5);
89        let type_width = self
90            .fields
91            .iter()
92            .map(|f| f.type_name.len())
93            .max()
94            .unwrap_or(4)
95            .max(4);
96
97        // Header
98        let header = format!(
99            "{:<name_width$}  {:<type_width$}  Nullable",
100            "Field",
101            "Type",
102            name_width = name_width,
103            type_width = type_width
104        );
105        lines.push(header);
106
107        // Separator
108        let sep = format!(
109            "{:-<name_width$}  {:-<type_width$}  --------",
110            "",
111            "",
112            name_width = name_width,
113            type_width = type_width
114        );
115        lines.push(sep);
116
117        // Fields
118        for field in &self.fields {
119            let nullable_str = if field.nullable { "Yes" } else { "No" };
120            let line = format!(
121                "{:<name_width$}  {:<type_width$}  {}",
122                truncate_string(&field.name, name_width),
123                truncate_string(&field.type_name, type_width),
124                nullable_str,
125                name_width = name_width,
126                type_width = type_width
127            );
128            lines.push(line);
129        }
130
131        lines
132    }
133
134    /// Get field info by index
135    pub fn field(&self, index: usize) -> Option<(&str, &str, bool)> {
136        self.fields
137            .get(index)
138            .map(|f| (f.name.as_str(), f.type_name.as_str(), f.nullable))
139    }
140
141    /// Get all field names
142    pub fn field_names(&self) -> Vec<&str> {
143        self.fields.iter().map(|f| f.name.as_str()).collect()
144    }
145
146    /// Get all type names
147    pub fn type_names(&self) -> Vec<&str> {
148        self.fields.iter().map(|f| f.type_name.as_str()).collect()
149    }
150}
151
152/// Format Arrow data type as human-readable string
153fn format_type_name(dt: &arrow::datatypes::DataType) -> String {
154    use arrow::datatypes::DataType;
155
156    match dt {
157        DataType::Null => "Null".to_string(),
158        DataType::Boolean => "Boolean".to_string(),
159        DataType::Int8 => "Int8".to_string(),
160        DataType::Int16 => "Int16".to_string(),
161        DataType::Int32 => "Int32".to_string(),
162        DataType::Int64 => "Int64".to_string(),
163        DataType::UInt8 => "UInt8".to_string(),
164        DataType::UInt16 => "UInt16".to_string(),
165        DataType::UInt32 => "UInt32".to_string(),
166        DataType::UInt64 => "UInt64".to_string(),
167        DataType::Float16 => "Float16".to_string(),
168        DataType::Float32 => "Float32".to_string(),
169        DataType::Float64 => "Float64".to_string(),
170        DataType::Utf8 => "Utf8".to_string(),
171        DataType::LargeUtf8 => "LargeUtf8".to_string(),
172        DataType::Binary => "Binary".to_string(),
173        DataType::LargeBinary => "LargeBinary".to_string(),
174        DataType::Date32 => "Date32".to_string(),
175        DataType::Date64 => "Date64".to_string(),
176        DataType::Timestamp(unit, tz) => {
177            let unit_str = match unit {
178                arrow::datatypes::TimeUnit::Second => "s",
179                arrow::datatypes::TimeUnit::Millisecond => "ms",
180                arrow::datatypes::TimeUnit::Microsecond => "us",
181                arrow::datatypes::TimeUnit::Nanosecond => "ns",
182            };
183            match tz {
184                Some(tz) => format!("Timestamp[{unit_str}, {tz}]"),
185                None => format!("Timestamp[{unit_str}]"),
186            }
187        }
188        DataType::List(inner) => format!("List<{}>", format_type_name(inner.data_type())),
189        DataType::LargeList(inner) => {
190            format!("LargeList<{}>", format_type_name(inner.data_type()))
191        }
192        DataType::Struct(fields) => {
193            format!("Struct({})", fields.len())
194        }
195        DataType::Dictionary(key, value) => {
196            format!(
197                "Dict<{}, {}>",
198                format_type_name(key),
199                format_type_name(value)
200            )
201        }
202        DataType::Map(field, _) => {
203            format!("Map<{}>", format_type_name(field.data_type()))
204        }
205        _ => format!("{dt:?}"),
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use std::sync::Arc;
212
213    use arrow::datatypes::{DataType, Field, Schema};
214
215    use super::*;
216
217    fn create_test_adapter() -> DatasetAdapter {
218        let schema = Arc::new(Schema::new(vec![
219            Field::new("id", DataType::Utf8, false),
220            Field::new("value", DataType::Int32, false),
221            Field::new("score", DataType::Float32, true),
222            Field::new("timestamp", DataType::Int64, false),
223        ]));
224
225        DatasetAdapter::from_batches(vec![], schema).unwrap()
226    }
227
228    #[test]
229    fn f_inspector_new() {
230        let adapter = create_test_adapter();
231        let inspector = SchemaInspector::new(&adapter);
232        assert_eq!(inspector.field_count(), 4);
233    }
234
235    #[test]
236    fn f_inspector_field_count() {
237        let adapter = create_test_adapter();
238        let inspector = SchemaInspector::new(&adapter);
239        assert_eq!(inspector.field_count(), 4);
240    }
241
242    #[test]
243    fn f_inspector_is_empty() {
244        let adapter = DatasetAdapter::empty();
245        let inspector = SchemaInspector::new(&adapter);
246        assert!(inspector.is_empty());
247    }
248
249    #[test]
250    fn f_inspector_field_names() {
251        let adapter = create_test_adapter();
252        let inspector = SchemaInspector::new(&adapter);
253        let names = inspector.field_names();
254        assert_eq!(names, vec!["id", "value", "score", "timestamp"]);
255    }
256
257    #[test]
258    fn f_inspector_type_names() {
259        let adapter = create_test_adapter();
260        let inspector = SchemaInspector::new(&adapter);
261        let types = inspector.type_names();
262        assert_eq!(types[0], "Utf8");
263        assert_eq!(types[1], "Int32");
264        assert_eq!(types[2], "Float32");
265    }
266
267    #[test]
268    fn f_inspector_field_info() {
269        let adapter = create_test_adapter();
270        let inspector = SchemaInspector::new(&adapter);
271
272        let (name, type_name, nullable) = inspector.field(0).unwrap();
273        assert_eq!(name, "id");
274        assert_eq!(type_name, "Utf8");
275        assert!(!nullable);
276
277        let (name, _, nullable) = inspector.field(2).unwrap();
278        assert_eq!(name, "score");
279        assert!(nullable);
280    }
281
282    #[test]
283    fn f_inspector_field_out_of_bounds() {
284        let adapter = create_test_adapter();
285        let inspector = SchemaInspector::new(&adapter);
286        assert!(inspector.field(100).is_none());
287    }
288
289    #[test]
290    fn f_inspector_render_lines() {
291        let adapter = create_test_adapter();
292        let inspector = SchemaInspector::new(&adapter);
293        let lines = inspector.render_lines();
294
295        // Header + separator + 4 fields = 6 lines
296        assert_eq!(lines.len(), 6);
297
298        // Header contains "Field" and "Type"
299        assert!(lines[0].contains("Field"));
300        assert!(lines[0].contains("Type"));
301
302        // Separator line
303        assert!(lines[1].contains("---"));
304
305        // Field lines
306        assert!(lines[2].contains("id"));
307        assert!(lines[2].contains("Utf8"));
308    }
309
310    #[test]
311    fn f_inspector_render_nullable() {
312        let adapter = create_test_adapter();
313        let inspector = SchemaInspector::new(&adapter);
314        let lines = inspector.render_lines();
315
316        // score is nullable
317        assert!(lines[4].contains("Yes"));
318        // id is not nullable
319        assert!(lines[2].contains("No"));
320    }
321
322    #[test]
323    fn f_inspector_clone() {
324        let adapter = create_test_adapter();
325        let inspector = SchemaInspector::new(&adapter);
326        let cloned = inspector.clone();
327        assert_eq!(inspector.field_count(), cloned.field_count());
328    }
329
330    #[test]
331    fn f_format_type_utf8() {
332        assert_eq!(format_type_name(&DataType::Utf8), "Utf8");
333    }
334
335    #[test]
336    fn f_format_type_int32() {
337        assert_eq!(format_type_name(&DataType::Int32), "Int32");
338    }
339
340    #[test]
341    fn f_format_type_float32() {
342        assert_eq!(format_type_name(&DataType::Float32), "Float32");
343    }
344
345    #[test]
346    fn f_format_type_list() {
347        let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
348        let formatted = format_type_name(&list_type);
349        assert!(formatted.contains("List"));
350        assert!(formatted.contains("Int32"));
351    }
352
353    #[test]
354    fn f_format_type_timestamp() {
355        let ts_type = DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None);
356        let formatted = format_type_name(&ts_type);
357        assert!(formatted.contains("Timestamp"));
358        assert!(formatted.contains("ms"));
359    }
360
361    #[test]
362    fn f_inspector_display_width() {
363        let adapter = create_test_adapter();
364        let inspector = SchemaInspector::new(&adapter);
365        // display_width should be reasonable (non-zero)
366        assert!(inspector.display_width() > 0);
367    }
368
369    #[test]
370    fn f_format_type_null() {
371        assert_eq!(format_type_name(&DataType::Null), "Null");
372    }
373
374    #[test]
375    fn f_format_type_boolean() {
376        assert_eq!(format_type_name(&DataType::Boolean), "Boolean");
377    }
378
379    #[test]
380    fn f_format_type_int8() {
381        assert_eq!(format_type_name(&DataType::Int8), "Int8");
382    }
383
384    #[test]
385    fn f_format_type_int16() {
386        assert_eq!(format_type_name(&DataType::Int16), "Int16");
387    }
388
389    #[test]
390    fn f_format_type_int64() {
391        assert_eq!(format_type_name(&DataType::Int64), "Int64");
392    }
393
394    #[test]
395    fn f_format_type_uint8() {
396        assert_eq!(format_type_name(&DataType::UInt8), "UInt8");
397    }
398
399    #[test]
400    fn f_format_type_uint16() {
401        assert_eq!(format_type_name(&DataType::UInt16), "UInt16");
402    }
403
404    #[test]
405    fn f_format_type_uint32() {
406        assert_eq!(format_type_name(&DataType::UInt32), "UInt32");
407    }
408
409    #[test]
410    fn f_format_type_uint64() {
411        assert_eq!(format_type_name(&DataType::UInt64), "UInt64");
412    }
413
414    #[test]
415    fn f_format_type_float16() {
416        assert_eq!(format_type_name(&DataType::Float16), "Float16");
417    }
418
419    #[test]
420    fn f_format_type_float64() {
421        assert_eq!(format_type_name(&DataType::Float64), "Float64");
422    }
423
424    #[test]
425    fn f_format_type_large_utf8() {
426        assert_eq!(format_type_name(&DataType::LargeUtf8), "LargeUtf8");
427    }
428
429    #[test]
430    fn f_format_type_binary() {
431        assert_eq!(format_type_name(&DataType::Binary), "Binary");
432    }
433
434    #[test]
435    fn f_format_type_large_binary() {
436        assert_eq!(format_type_name(&DataType::LargeBinary), "LargeBinary");
437    }
438
439    #[test]
440    fn f_format_type_date32() {
441        assert_eq!(format_type_name(&DataType::Date32), "Date32");
442    }
443
444    #[test]
445    fn f_format_type_date64() {
446        assert_eq!(format_type_name(&DataType::Date64), "Date64");
447    }
448
449    #[test]
450    fn f_format_type_timestamp_second() {
451        let ts = DataType::Timestamp(arrow::datatypes::TimeUnit::Second, None);
452        let formatted = format_type_name(&ts);
453        assert!(formatted.contains("[s]"));
454    }
455
456    #[test]
457    fn f_format_type_timestamp_microsecond() {
458        let ts = DataType::Timestamp(arrow::datatypes::TimeUnit::Microsecond, None);
459        let formatted = format_type_name(&ts);
460        assert!(formatted.contains("[us]"));
461    }
462
463    #[test]
464    fn f_format_type_timestamp_nanosecond() {
465        let ts = DataType::Timestamp(arrow::datatypes::TimeUnit::Nanosecond, None);
466        let formatted = format_type_name(&ts);
467        assert!(formatted.contains("[ns]"));
468    }
469
470    #[test]
471    fn f_format_type_timestamp_with_tz() {
472        let ts = DataType::Timestamp(
473            arrow::datatypes::TimeUnit::Millisecond,
474            Some(Arc::from("UTC")),
475        );
476        let formatted = format_type_name(&ts);
477        assert!(formatted.contains("UTC"));
478    }
479
480    #[test]
481    fn f_format_type_large_list() {
482        let list_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, true)));
483        let formatted = format_type_name(&list_type);
484        assert!(formatted.contains("LargeList"));
485        assert!(formatted.contains("Utf8"));
486    }
487
488    #[test]
489    fn f_format_type_struct() {
490        let fields = vec![
491            Field::new("a", DataType::Int32, false),
492            Field::new("b", DataType::Utf8, true),
493        ];
494        let struct_type = DataType::Struct(fields.into());
495        let formatted = format_type_name(&struct_type);
496        assert!(formatted.contains("Struct"));
497        assert!(formatted.contains('2'));
498    }
499
500    #[test]
501    fn f_format_type_dictionary() {
502        let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
503        let formatted = format_type_name(&dict_type);
504        assert!(formatted.contains("Dict"));
505        assert!(formatted.contains("Int32"));
506        assert!(formatted.contains("Utf8"));
507    }
508
509    #[test]
510    fn f_format_type_map() {
511        let map_type = DataType::Map(
512            Arc::new(Field::new(
513                "entries",
514                DataType::Struct(
515                    vec![
516                        Field::new("key", DataType::Utf8, false),
517                        Field::new("value", DataType::Int32, true),
518                    ]
519                    .into(),
520                ),
521                false,
522            )),
523            false,
524        );
525        let formatted = format_type_name(&map_type);
526        assert!(formatted.contains("Map"));
527    }
528
529    #[test]
530    fn f_format_type_fallback() {
531        // Use a type that falls through to the debug fallback
532        let duration_type = DataType::Duration(arrow::datatypes::TimeUnit::Second);
533        let formatted = format_type_name(&duration_type);
534        // Should contain "Duration" from debug output
535        assert!(formatted.contains("Duration"));
536    }
537}