1use super::{adapter::DatasetAdapter, format::truncate_string};
6
7#[derive(Debug, Clone)]
22pub struct SchemaInspector {
23 fields: Vec<FieldInfo>,
25 display_width: u16,
27}
28
29#[derive(Debug, Clone)]
31struct FieldInfo {
32 name: String,
33 type_name: String,
34 nullable: bool,
35}
36
37impl SchemaInspector {
38 pub fn new(adapter: &DatasetAdapter) -> Self {
40 Self::with_width(adapter, 80)
41 }
42
43 pub fn with_width(adapter: &DatasetAdapter, width: u16) -> Self {
45 let schema = adapter.schema();
46 let fields: Vec<FieldInfo> = schema
47 .fields()
48 .iter()
49 .map(|f| FieldInfo {
50 name: f.name().clone(),
51 type_name: format_type_name(f.data_type()),
52 nullable: f.is_nullable(),
53 })
54 .collect();
55
56 Self {
57 fields,
58 display_width: width,
59 }
60 }
61
62 pub fn field_count(&self) -> usize {
64 self.fields.len()
65 }
66
67 pub fn is_empty(&self) -> bool {
69 self.fields.is_empty()
70 }
71
72 pub fn display_width(&self) -> u16 {
74 self.display_width
75 }
76
77 pub fn render_lines(&self) -> Vec<String> {
79 let mut lines = Vec::with_capacity(self.fields.len() + 3);
80
81 let name_width = self
83 .fields
84 .iter()
85 .map(|f| f.name.len())
86 .max()
87 .unwrap_or(5)
88 .max(5);
89 let type_width = self
90 .fields
91 .iter()
92 .map(|f| f.type_name.len())
93 .max()
94 .unwrap_or(4)
95 .max(4);
96
97 let header = format!(
99 "{:<name_width$} {:<type_width$} Nullable",
100 "Field",
101 "Type",
102 name_width = name_width,
103 type_width = type_width
104 );
105 lines.push(header);
106
107 let sep = format!(
109 "{:-<name_width$} {:-<type_width$} --------",
110 "",
111 "",
112 name_width = name_width,
113 type_width = type_width
114 );
115 lines.push(sep);
116
117 for field in &self.fields {
119 let nullable_str = if field.nullable { "Yes" } else { "No" };
120 let line = format!(
121 "{:<name_width$} {:<type_width$} {}",
122 truncate_string(&field.name, name_width),
123 truncate_string(&field.type_name, type_width),
124 nullable_str,
125 name_width = name_width,
126 type_width = type_width
127 );
128 lines.push(line);
129 }
130
131 lines
132 }
133
134 pub fn field(&self, index: usize) -> Option<(&str, &str, bool)> {
136 self.fields
137 .get(index)
138 .map(|f| (f.name.as_str(), f.type_name.as_str(), f.nullable))
139 }
140
141 pub fn field_names(&self) -> Vec<&str> {
143 self.fields.iter().map(|f| f.name.as_str()).collect()
144 }
145
146 pub fn type_names(&self) -> Vec<&str> {
148 self.fields.iter().map(|f| f.type_name.as_str()).collect()
149 }
150}
151
152fn format_type_name(dt: &arrow::datatypes::DataType) -> String {
154 use arrow::datatypes::DataType;
155
156 match dt {
157 DataType::Null => "Null".to_string(),
158 DataType::Boolean => "Boolean".to_string(),
159 DataType::Int8 => "Int8".to_string(),
160 DataType::Int16 => "Int16".to_string(),
161 DataType::Int32 => "Int32".to_string(),
162 DataType::Int64 => "Int64".to_string(),
163 DataType::UInt8 => "UInt8".to_string(),
164 DataType::UInt16 => "UInt16".to_string(),
165 DataType::UInt32 => "UInt32".to_string(),
166 DataType::UInt64 => "UInt64".to_string(),
167 DataType::Float16 => "Float16".to_string(),
168 DataType::Float32 => "Float32".to_string(),
169 DataType::Float64 => "Float64".to_string(),
170 DataType::Utf8 => "Utf8".to_string(),
171 DataType::LargeUtf8 => "LargeUtf8".to_string(),
172 DataType::Binary => "Binary".to_string(),
173 DataType::LargeBinary => "LargeBinary".to_string(),
174 DataType::Date32 => "Date32".to_string(),
175 DataType::Date64 => "Date64".to_string(),
176 DataType::Timestamp(unit, tz) => {
177 let unit_str = match unit {
178 arrow::datatypes::TimeUnit::Second => "s",
179 arrow::datatypes::TimeUnit::Millisecond => "ms",
180 arrow::datatypes::TimeUnit::Microsecond => "us",
181 arrow::datatypes::TimeUnit::Nanosecond => "ns",
182 };
183 match tz {
184 Some(tz) => format!("Timestamp[{unit_str}, {tz}]"),
185 None => format!("Timestamp[{unit_str}]"),
186 }
187 }
188 DataType::List(inner) => format!("List<{}>", format_type_name(inner.data_type())),
189 DataType::LargeList(inner) => {
190 format!("LargeList<{}>", format_type_name(inner.data_type()))
191 }
192 DataType::Struct(fields) => {
193 format!("Struct({})", fields.len())
194 }
195 DataType::Dictionary(key, value) => {
196 format!(
197 "Dict<{}, {}>",
198 format_type_name(key),
199 format_type_name(value)
200 )
201 }
202 DataType::Map(field, _) => {
203 format!("Map<{}>", format_type_name(field.data_type()))
204 }
205 _ => format!("{dt:?}"),
206 }
207}
208
209#[cfg(test)]
210mod tests {
211 use std::sync::Arc;
212
213 use arrow::datatypes::{DataType, Field, Schema};
214
215 use super::*;
216
217 fn create_test_adapter() -> DatasetAdapter {
218 let schema = Arc::new(Schema::new(vec![
219 Field::new("id", DataType::Utf8, false),
220 Field::new("value", DataType::Int32, false),
221 Field::new("score", DataType::Float32, true),
222 Field::new("timestamp", DataType::Int64, false),
223 ]));
224
225 DatasetAdapter::from_batches(vec![], schema).unwrap()
226 }
227
228 #[test]
229 fn f_inspector_new() {
230 let adapter = create_test_adapter();
231 let inspector = SchemaInspector::new(&adapter);
232 assert_eq!(inspector.field_count(), 4);
233 }
234
235 #[test]
236 fn f_inspector_field_count() {
237 let adapter = create_test_adapter();
238 let inspector = SchemaInspector::new(&adapter);
239 assert_eq!(inspector.field_count(), 4);
240 }
241
242 #[test]
243 fn f_inspector_is_empty() {
244 let adapter = DatasetAdapter::empty();
245 let inspector = SchemaInspector::new(&adapter);
246 assert!(inspector.is_empty());
247 }
248
249 #[test]
250 fn f_inspector_field_names() {
251 let adapter = create_test_adapter();
252 let inspector = SchemaInspector::new(&adapter);
253 let names = inspector.field_names();
254 assert_eq!(names, vec!["id", "value", "score", "timestamp"]);
255 }
256
257 #[test]
258 fn f_inspector_type_names() {
259 let adapter = create_test_adapter();
260 let inspector = SchemaInspector::new(&adapter);
261 let types = inspector.type_names();
262 assert_eq!(types[0], "Utf8");
263 assert_eq!(types[1], "Int32");
264 assert_eq!(types[2], "Float32");
265 }
266
267 #[test]
268 fn f_inspector_field_info() {
269 let adapter = create_test_adapter();
270 let inspector = SchemaInspector::new(&adapter);
271
272 let (name, type_name, nullable) = inspector.field(0).unwrap();
273 assert_eq!(name, "id");
274 assert_eq!(type_name, "Utf8");
275 assert!(!nullable);
276
277 let (name, _, nullable) = inspector.field(2).unwrap();
278 assert_eq!(name, "score");
279 assert!(nullable);
280 }
281
282 #[test]
283 fn f_inspector_field_out_of_bounds() {
284 let adapter = create_test_adapter();
285 let inspector = SchemaInspector::new(&adapter);
286 assert!(inspector.field(100).is_none());
287 }
288
289 #[test]
290 fn f_inspector_render_lines() {
291 let adapter = create_test_adapter();
292 let inspector = SchemaInspector::new(&adapter);
293 let lines = inspector.render_lines();
294
295 assert_eq!(lines.len(), 6);
297
298 assert!(lines[0].contains("Field"));
300 assert!(lines[0].contains("Type"));
301
302 assert!(lines[1].contains("---"));
304
305 assert!(lines[2].contains("id"));
307 assert!(lines[2].contains("Utf8"));
308 }
309
310 #[test]
311 fn f_inspector_render_nullable() {
312 let adapter = create_test_adapter();
313 let inspector = SchemaInspector::new(&adapter);
314 let lines = inspector.render_lines();
315
316 assert!(lines[4].contains("Yes"));
318 assert!(lines[2].contains("No"));
320 }
321
322 #[test]
323 fn f_inspector_clone() {
324 let adapter = create_test_adapter();
325 let inspector = SchemaInspector::new(&adapter);
326 let cloned = inspector.clone();
327 assert_eq!(inspector.field_count(), cloned.field_count());
328 }
329
330 #[test]
331 fn f_format_type_utf8() {
332 assert_eq!(format_type_name(&DataType::Utf8), "Utf8");
333 }
334
335 #[test]
336 fn f_format_type_int32() {
337 assert_eq!(format_type_name(&DataType::Int32), "Int32");
338 }
339
340 #[test]
341 fn f_format_type_float32() {
342 assert_eq!(format_type_name(&DataType::Float32), "Float32");
343 }
344
345 #[test]
346 fn f_format_type_list() {
347 let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
348 let formatted = format_type_name(&list_type);
349 assert!(formatted.contains("List"));
350 assert!(formatted.contains("Int32"));
351 }
352
353 #[test]
354 fn f_format_type_timestamp() {
355 let ts_type = DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None);
356 let formatted = format_type_name(&ts_type);
357 assert!(formatted.contains("Timestamp"));
358 assert!(formatted.contains("ms"));
359 }
360
361 #[test]
362 fn f_inspector_display_width() {
363 let adapter = create_test_adapter();
364 let inspector = SchemaInspector::new(&adapter);
365 assert!(inspector.display_width() > 0);
367 }
368
369 #[test]
370 fn f_format_type_null() {
371 assert_eq!(format_type_name(&DataType::Null), "Null");
372 }
373
374 #[test]
375 fn f_format_type_boolean() {
376 assert_eq!(format_type_name(&DataType::Boolean), "Boolean");
377 }
378
379 #[test]
380 fn f_format_type_int8() {
381 assert_eq!(format_type_name(&DataType::Int8), "Int8");
382 }
383
384 #[test]
385 fn f_format_type_int16() {
386 assert_eq!(format_type_name(&DataType::Int16), "Int16");
387 }
388
389 #[test]
390 fn f_format_type_int64() {
391 assert_eq!(format_type_name(&DataType::Int64), "Int64");
392 }
393
394 #[test]
395 fn f_format_type_uint8() {
396 assert_eq!(format_type_name(&DataType::UInt8), "UInt8");
397 }
398
399 #[test]
400 fn f_format_type_uint16() {
401 assert_eq!(format_type_name(&DataType::UInt16), "UInt16");
402 }
403
404 #[test]
405 fn f_format_type_uint32() {
406 assert_eq!(format_type_name(&DataType::UInt32), "UInt32");
407 }
408
409 #[test]
410 fn f_format_type_uint64() {
411 assert_eq!(format_type_name(&DataType::UInt64), "UInt64");
412 }
413
414 #[test]
415 fn f_format_type_float16() {
416 assert_eq!(format_type_name(&DataType::Float16), "Float16");
417 }
418
419 #[test]
420 fn f_format_type_float64() {
421 assert_eq!(format_type_name(&DataType::Float64), "Float64");
422 }
423
424 #[test]
425 fn f_format_type_large_utf8() {
426 assert_eq!(format_type_name(&DataType::LargeUtf8), "LargeUtf8");
427 }
428
429 #[test]
430 fn f_format_type_binary() {
431 assert_eq!(format_type_name(&DataType::Binary), "Binary");
432 }
433
434 #[test]
435 fn f_format_type_large_binary() {
436 assert_eq!(format_type_name(&DataType::LargeBinary), "LargeBinary");
437 }
438
439 #[test]
440 fn f_format_type_date32() {
441 assert_eq!(format_type_name(&DataType::Date32), "Date32");
442 }
443
444 #[test]
445 fn f_format_type_date64() {
446 assert_eq!(format_type_name(&DataType::Date64), "Date64");
447 }
448
449 #[test]
450 fn f_format_type_timestamp_second() {
451 let ts = DataType::Timestamp(arrow::datatypes::TimeUnit::Second, None);
452 let formatted = format_type_name(&ts);
453 assert!(formatted.contains("[s]"));
454 }
455
456 #[test]
457 fn f_format_type_timestamp_microsecond() {
458 let ts = DataType::Timestamp(arrow::datatypes::TimeUnit::Microsecond, None);
459 let formatted = format_type_name(&ts);
460 assert!(formatted.contains("[us]"));
461 }
462
463 #[test]
464 fn f_format_type_timestamp_nanosecond() {
465 let ts = DataType::Timestamp(arrow::datatypes::TimeUnit::Nanosecond, None);
466 let formatted = format_type_name(&ts);
467 assert!(formatted.contains("[ns]"));
468 }
469
470 #[test]
471 fn f_format_type_timestamp_with_tz() {
472 let ts = DataType::Timestamp(
473 arrow::datatypes::TimeUnit::Millisecond,
474 Some(Arc::from("UTC")),
475 );
476 let formatted = format_type_name(&ts);
477 assert!(formatted.contains("UTC"));
478 }
479
480 #[test]
481 fn f_format_type_large_list() {
482 let list_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, true)));
483 let formatted = format_type_name(&list_type);
484 assert!(formatted.contains("LargeList"));
485 assert!(formatted.contains("Utf8"));
486 }
487
488 #[test]
489 fn f_format_type_struct() {
490 let fields = vec![
491 Field::new("a", DataType::Int32, false),
492 Field::new("b", DataType::Utf8, true),
493 ];
494 let struct_type = DataType::Struct(fields.into());
495 let formatted = format_type_name(&struct_type);
496 assert!(formatted.contains("Struct"));
497 assert!(formatted.contains('2'));
498 }
499
500 #[test]
501 fn f_format_type_dictionary() {
502 let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
503 let formatted = format_type_name(&dict_type);
504 assert!(formatted.contains("Dict"));
505 assert!(formatted.contains("Int32"));
506 assert!(formatted.contains("Utf8"));
507 }
508
509 #[test]
510 fn f_format_type_map() {
511 let map_type = DataType::Map(
512 Arc::new(Field::new(
513 "entries",
514 DataType::Struct(
515 vec![
516 Field::new("key", DataType::Utf8, false),
517 Field::new("value", DataType::Int32, true),
518 ]
519 .into(),
520 ),
521 false,
522 )),
523 false,
524 );
525 let formatted = format_type_name(&map_type);
526 assert!(formatted.contains("Map"));
527 }
528
529 #[test]
530 fn f_format_type_fallback() {
531 let duration_type = DataType::Duration(arrow::datatypes::TimeUnit::Second);
533 let formatted = format_type_name(&duration_type);
534 assert!(formatted.contains("Duration"));
536 }
537}