Skip to main content

lance_namespace/
schema.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Schema conversion utilities for Lance Namespace.
5//!
6//! This module provides functions to convert between JsonArrow schema representations
7//! and Arrow schema types.
8
9use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
10use lance_core::{Error, Result};
11use lance_namespace_reqwest_client::models::{JsonArrowDataType, JsonArrowField, JsonArrowSchema};
12
13/// Convert Arrow Schema to JsonArrowSchema
14pub fn arrow_schema_to_json(arrow_schema: &ArrowSchema) -> Result<JsonArrowSchema> {
15    let fields: Result<Vec<JsonArrowField>> = arrow_schema
16        .fields()
17        .iter()
18        .map(|f| arrow_field_to_json(f.as_ref()))
19        .collect();
20
21    let metadata = if arrow_schema.metadata().is_empty() {
22        None
23    } else {
24        Some(arrow_schema.metadata().clone())
25    };
26
27    Ok(JsonArrowSchema {
28        fields: fields?,
29        metadata,
30    })
31}
32
33/// Convert Arrow Field to JsonArrowField
34fn arrow_field_to_json(arrow_field: &Field) -> Result<JsonArrowField> {
35    let data_type = arrow_type_to_json(arrow_field.data_type())?;
36
37    Ok(JsonArrowField {
38        name: arrow_field.name().clone(),
39        nullable: arrow_field.is_nullable(),
40        r#type: Box::new(data_type),
41        metadata: if arrow_field.metadata().is_empty() {
42            None
43        } else {
44            Some(arrow_field.metadata().clone())
45        },
46    })
47}
48
49/// Convert Arrow DataType to JsonArrowDataType
50fn arrow_type_to_json(data_type: &DataType) -> Result<JsonArrowDataType> {
51    match data_type {
52        // Primitive types
53        DataType::Null => Ok(JsonArrowDataType::new("null".to_string())),
54        DataType::Boolean => Ok(JsonArrowDataType::new("bool".to_string())),
55        DataType::Int8 => Ok(JsonArrowDataType::new("int8".to_string())),
56        DataType::UInt8 => Ok(JsonArrowDataType::new("uint8".to_string())),
57        DataType::Int16 => Ok(JsonArrowDataType::new("int16".to_string())),
58        DataType::UInt16 => Ok(JsonArrowDataType::new("uint16".to_string())),
59        DataType::Int32 => Ok(JsonArrowDataType::new("int32".to_string())),
60        DataType::UInt32 => Ok(JsonArrowDataType::new("uint32".to_string())),
61        DataType::Int64 => Ok(JsonArrowDataType::new("int64".to_string())),
62        DataType::UInt64 => Ok(JsonArrowDataType::new("uint64".to_string())),
63        DataType::Float16 => Ok(JsonArrowDataType::new("float16".to_string())),
64        DataType::Float32 => Ok(JsonArrowDataType::new("float32".to_string())),
65        DataType::Float64 => Ok(JsonArrowDataType::new("float64".to_string())),
66        DataType::Decimal32(precision, scale) => {
67            let mut dt = JsonArrowDataType::new("decimal32".to_string());
68            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
69            Ok(dt)
70        }
71        DataType::Decimal64(precision, scale) => {
72            let mut dt = JsonArrowDataType::new("decimal64".to_string());
73            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
74            Ok(dt)
75        }
76        DataType::Decimal128(precision, scale) => {
77            let mut dt = JsonArrowDataType::new("decimal128".to_string());
78            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
79            Ok(dt)
80        }
81        DataType::Decimal256(precision, scale) => {
82            let mut dt = JsonArrowDataType::new("decimal256".to_string());
83            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
84            Ok(dt)
85        }
86        DataType::Date32 => Ok(JsonArrowDataType::new("date32".to_string())),
87        DataType::Date64 => Ok(JsonArrowDataType::new("date64".to_string())),
88        DataType::Time32(_) => Ok(JsonArrowDataType::new("time32".to_string())),
89        DataType::Time64(_) => Ok(JsonArrowDataType::new("time64".to_string())),
90        DataType::Timestamp(_, _tz) => {
91            // TODO: We could encode timezone info if needed
92            Ok(JsonArrowDataType::new("timestamp".to_string()))
93        }
94        DataType::Duration(_) => Ok(JsonArrowDataType::new("duration".to_string())),
95        DataType::Interval(_) => Ok(JsonArrowDataType::new("interval".to_string())),
96
97        // String and Binary types
98        DataType::Utf8 => Ok(JsonArrowDataType::new("utf8".to_string())),
99        DataType::LargeUtf8 => Ok(JsonArrowDataType::new("large_utf8".to_string())),
100        DataType::Binary => Ok(JsonArrowDataType::new("binary".to_string())),
101        DataType::LargeBinary => Ok(JsonArrowDataType::new("large_binary".to_string())),
102        DataType::FixedSizeBinary(size) => {
103            let mut dt = JsonArrowDataType::new("fixed_size_binary".to_string());
104            dt.length = Some(*size as i64);
105            Ok(dt)
106        }
107
108        // Nested types
109        DataType::List(field) => {
110            let inner_type = arrow_type_to_json(field.data_type())?;
111            let inner_field = JsonArrowField {
112                name: field.name().clone(),
113                nullable: field.is_nullable(),
114                r#type: Box::new(inner_type),
115                metadata: if field.metadata().is_empty() {
116                    None
117                } else {
118                    Some(field.metadata().clone())
119                },
120            };
121            Ok(JsonArrowDataType {
122                r#type: "list".to_string(),
123                fields: Some(vec![inner_field]),
124                length: None,
125            })
126        }
127        DataType::LargeList(field) => {
128            let inner_type = arrow_type_to_json(field.data_type())?;
129            let inner_field = JsonArrowField {
130                name: field.name().clone(),
131                nullable: field.is_nullable(),
132                r#type: Box::new(inner_type),
133                metadata: if field.metadata().is_empty() {
134                    None
135                } else {
136                    Some(field.metadata().clone())
137                },
138            };
139            Ok(JsonArrowDataType {
140                r#type: "large_list".to_string(),
141                fields: Some(vec![inner_field]),
142                length: None,
143            })
144        }
145        DataType::FixedSizeList(field, size) => {
146            let inner_type = arrow_type_to_json(field.data_type())?;
147            let inner_field = JsonArrowField {
148                name: field.name().clone(),
149                nullable: field.is_nullable(),
150                r#type: Box::new(inner_type),
151                metadata: if field.metadata().is_empty() {
152                    None
153                } else {
154                    Some(field.metadata().clone())
155                },
156            };
157            Ok(JsonArrowDataType {
158                r#type: "fixed_size_list".to_string(),
159                fields: Some(vec![inner_field]),
160                length: Some(*size as i64),
161            })
162        }
163        DataType::Struct(fields) => {
164            let json_fields: Result<Vec<JsonArrowField>> = fields
165                .iter()
166                .map(|f| arrow_field_to_json(f.as_ref()))
167                .collect();
168            Ok(JsonArrowDataType {
169                r#type: "struct".to_string(),
170                fields: Some(json_fields?),
171                length: None,
172            })
173        }
174        DataType::Union(_, _) => {
175            // Union types are complex, for now we'll skip detailed conversion
176            Ok(JsonArrowDataType::new("union".to_string()))
177        }
178        DataType::Dictionary(_, value_type) => {
179            // For dictionary, return the value type
180            arrow_type_to_json(value_type)
181        }
182
183        DataType::Map(entries_field, keys_sorted) => {
184            if *keys_sorted {
185                return Err(Error::namespace(format!(
186                    "Map types with keys_sorted=true are not yet supported for JSON conversion: {:?}",
187                    data_type
188                )));
189            }
190            let inner_type = arrow_type_to_json(entries_field.data_type())?;
191            let inner_field = JsonArrowField {
192                name: entries_field.name().clone(),
193                nullable: entries_field.is_nullable(),
194                r#type: Box::new(inner_type),
195                metadata: if entries_field.metadata().is_empty() {
196                    None
197                } else {
198                    Some(entries_field.metadata().clone())
199                },
200            };
201            Ok(JsonArrowDataType {
202                r#type: "map".to_string(),
203                fields: Some(vec![inner_field]),
204                length: None,
205            })
206        }
207
208        // Unsupported types
209        DataType::RunEndEncoded(_, _) => Err(Error::namespace(format!(
210            "RunEndEncoded type is not yet supported for JSON conversion: {:?}",
211            data_type
212        ))),
213        DataType::ListView(_) | DataType::LargeListView(_) => Err(Error::namespace(format!(
214            "ListView types are not yet supported for JSON conversion: {:?}",
215            data_type
216        ))),
217        DataType::Utf8View | DataType::BinaryView => Err(Error::namespace(format!(
218            "View types are not yet supported for JSON conversion: {:?}",
219            data_type
220        ))),
221    }
222}
223
224/// Convert JsonArrowSchema to Arrow Schema
225pub fn convert_json_arrow_schema(json_schema: &JsonArrowSchema) -> Result<ArrowSchema> {
226    let fields: Result<Vec<Field>> = json_schema
227        .fields
228        .iter()
229        .map(convert_json_arrow_field)
230        .collect();
231
232    let metadata = json_schema.metadata.as_ref().cloned().unwrap_or_default();
233
234    Ok(ArrowSchema::new_with_metadata(fields?, metadata))
235}
236
237/// Convert JsonArrowField to Arrow Field
238pub fn convert_json_arrow_field(json_field: &JsonArrowField) -> Result<Field> {
239    let data_type = convert_json_arrow_type(&json_field.r#type)?;
240    let nullable = json_field.nullable;
241
242    let field = Field::new(&json_field.name, data_type, nullable);
243    Ok(match json_field.metadata.as_ref() {
244        Some(metadata) => field.with_metadata(metadata.clone()),
245        None => field,
246    })
247}
248
249/// Convert JsonArrowDataType to Arrow DataType
250pub fn convert_json_arrow_type(json_type: &JsonArrowDataType) -> Result<DataType> {
251    use std::sync::Arc;
252
253    let type_name = json_type.r#type.to_lowercase();
254
255    match type_name.as_str() {
256        // Primitive types
257        "null" => Ok(DataType::Null),
258        "bool" | "boolean" => Ok(DataType::Boolean),
259        "int8" => Ok(DataType::Int8),
260        "uint8" => Ok(DataType::UInt8),
261        "int16" => Ok(DataType::Int16),
262        "uint16" => Ok(DataType::UInt16),
263        "int32" => Ok(DataType::Int32),
264        "uint32" => Ok(DataType::UInt32),
265        "int64" => Ok(DataType::Int64),
266        "uint64" => Ok(DataType::UInt64),
267        "float16" => Ok(DataType::Float16),
268        "float32" => Ok(DataType::Float32),
269        "float64" => Ok(DataType::Float64),
270
271        // Decimal types - encoding: precision * 1000 + scale
272        // Decoding must handle negative scale: precision = ((encoded + 128) / 1000)
273        "decimal32" => {
274            let encoded = json_type.length.unwrap_or(0);
275            let precision = ((encoded + 128) / 1000) as u8;
276            let scale = (encoded - precision as i64 * 1000) as i8;
277            Ok(DataType::Decimal32(precision, scale))
278        }
279        "decimal64" => {
280            let encoded = json_type.length.unwrap_or(0);
281            let precision = ((encoded + 128) / 1000) as u8;
282            let scale = (encoded - precision as i64 * 1000) as i8;
283            Ok(DataType::Decimal64(precision, scale))
284        }
285        "decimal128" => {
286            let encoded = json_type.length.unwrap_or(0);
287            let precision = ((encoded + 128) / 1000) as u8;
288            let scale = (encoded - precision as i64 * 1000) as i8;
289            Ok(DataType::Decimal128(precision, scale))
290        }
291        "decimal256" => {
292            let encoded = json_type.length.unwrap_or(0);
293            let precision = ((encoded + 128) / 1000) as u8;
294            let scale = (encoded - precision as i64 * 1000) as i8;
295            Ok(DataType::Decimal256(precision, scale))
296        }
297
298        // Date/Time types
299        "date32" => Ok(DataType::Date32),
300        "date64" => Ok(DataType::Date64),
301        "timestamp" => Ok(DataType::Timestamp(
302            arrow::datatypes::TimeUnit::Microsecond,
303            None,
304        )),
305        "duration" => Ok(DataType::Duration(arrow::datatypes::TimeUnit::Microsecond)),
306
307        // String and Binary types
308        "utf8" => Ok(DataType::Utf8),
309        "large_utf8" => Ok(DataType::LargeUtf8),
310        "binary" => Ok(DataType::Binary),
311        "large_binary" => Ok(DataType::LargeBinary),
312        "fixed_size_binary" => {
313            let size = json_type.length.unwrap_or(0) as i32;
314            Ok(DataType::FixedSizeBinary(size))
315        }
316
317        // Nested types
318        "list" => {
319            let inner = json_type
320                .fields
321                .as_ref()
322                .and_then(|f| f.first())
323                .ok_or_else(|| Error::namespace("list type missing inner field"))?;
324            Ok(DataType::List(Arc::new(convert_json_arrow_field(inner)?)))
325        }
326        "large_list" => {
327            let inner = json_type
328                .fields
329                .as_ref()
330                .and_then(|f| f.first())
331                .ok_or_else(|| Error::namespace("large_list type missing inner field"))?;
332            Ok(DataType::LargeList(Arc::new(convert_json_arrow_field(
333                inner,
334            )?)))
335        }
336        "fixed_size_list" => {
337            let inner = json_type
338                .fields
339                .as_ref()
340                .and_then(|f| f.first())
341                .ok_or_else(|| Error::namespace("fixed_size_list type missing inner field"))?;
342            let size = json_type.length.unwrap_or(0) as i32;
343            Ok(DataType::FixedSizeList(
344                Arc::new(convert_json_arrow_field(inner)?),
345                size,
346            ))
347        }
348        "struct" => {
349            let fields = json_type
350                .fields
351                .as_ref()
352                .ok_or_else(|| Error::namespace("struct type missing fields"))?;
353            let arrow_fields: Result<Vec<Field>> =
354                fields.iter().map(convert_json_arrow_field).collect();
355            Ok(DataType::Struct(arrow_fields?.into()))
356        }
357        "map" => {
358            let entries = json_type
359                .fields
360                .as_ref()
361                .and_then(|f| f.first())
362                .ok_or_else(|| Error::namespace("map type missing entries field"))?;
363            Ok(DataType::Map(
364                Arc::new(convert_json_arrow_field(entries)?),
365                false,
366            ))
367        }
368
369        _ => Err(Error::namespace(format!(
370            "Unsupported Arrow type: {}",
371            type_name
372        ))),
373    }
374}
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379    use std::collections::HashMap;
380    use std::sync::Arc;
381
382    #[test]
383    fn test_extension_metadata_preserved_in_json_roundtrip() {
384        const ARROW_EXT_NAME_KEY: &str = "ARROW:extension:name";
385        const LANCE_JSON_EXT_NAME: &str = "lance.json";
386
387        let meta_field =
388            Field::new("meta", DataType::Binary, true).with_metadata(HashMap::from([(
389                ARROW_EXT_NAME_KEY.to_string(),
390                LANCE_JSON_EXT_NAME.to_string(),
391            )]));
392        let arrow_schema =
393            ArrowSchema::new(vec![Field::new("id", DataType::Int32, false), meta_field]);
394
395        let json_schema = arrow_schema_to_json(&arrow_schema).unwrap();
396        let meta_json_field = json_schema
397            .fields
398            .iter()
399            .find(|f| f.name == "meta")
400            .unwrap();
401        assert!(
402            meta_json_field
403                .metadata
404                .as_ref()
405                .unwrap()
406                .contains_key(ARROW_EXT_NAME_KEY)
407        );
408
409        let roundtrip = convert_json_arrow_schema(&json_schema).unwrap();
410        let meta_field = roundtrip.field_with_name("meta").unwrap();
411        assert_eq!(
412            meta_field.metadata().get(ARROW_EXT_NAME_KEY),
413            Some(&LANCE_JSON_EXT_NAME.to_string())
414        );
415    }
416
417    #[test]
418    fn test_convert_basic_types() {
419        // Test int32
420        let int_type = JsonArrowDataType::new("int32".to_string());
421        let result = convert_json_arrow_type(&int_type).unwrap();
422        assert_eq!(result, DataType::Int32);
423
424        // Test utf8
425        let string_type = JsonArrowDataType::new("utf8".to_string());
426        let result = convert_json_arrow_type(&string_type).unwrap();
427        assert_eq!(result, DataType::Utf8);
428
429        // Test float64
430        let float_type = JsonArrowDataType::new("float64".to_string());
431        let result = convert_json_arrow_type(&float_type).unwrap();
432        assert_eq!(result, DataType::Float64);
433
434        // Test binary
435        let binary_type = JsonArrowDataType::new("binary".to_string());
436        let result = convert_json_arrow_type(&binary_type).unwrap();
437        assert_eq!(result, DataType::Binary);
438    }
439
440    #[test]
441    fn test_convert_field() {
442        let int_type = JsonArrowDataType::new("int32".to_string());
443        let field = JsonArrowField {
444            name: "test_field".to_string(),
445            r#type: Box::new(int_type),
446            nullable: false,
447            metadata: None,
448        };
449
450        let result = convert_json_arrow_field(&field).unwrap();
451        assert_eq!(result.name(), "test_field");
452        assert_eq!(result.data_type(), &DataType::Int32);
453        assert!(!result.is_nullable());
454    }
455
456    #[test]
457    fn test_convert_schema() {
458        let int_type = JsonArrowDataType::new("int32".to_string());
459        let string_type = JsonArrowDataType::new("utf8".to_string());
460
461        let id_field = JsonArrowField {
462            name: "id".to_string(),
463            r#type: Box::new(int_type),
464            nullable: false,
465            metadata: None,
466        };
467
468        let name_field = JsonArrowField {
469            name: "name".to_string(),
470            r#type: Box::new(string_type),
471            nullable: true,
472            metadata: None,
473        };
474
475        let mut metadata = HashMap::new();
476        metadata.insert("key".to_string(), "value".to_string());
477
478        let schema = JsonArrowSchema {
479            fields: vec![id_field, name_field],
480            metadata: Some(metadata.clone()),
481        };
482
483        let result = convert_json_arrow_schema(&schema).unwrap();
484        assert_eq!(result.fields().len(), 2);
485        assert_eq!(result.field(0).name(), "id");
486        assert_eq!(result.field(1).name(), "name");
487        assert_eq!(result.metadata(), &metadata);
488    }
489
490    #[test]
491    fn test_unsupported_type() {
492        let unsupported_type = JsonArrowDataType::new("unsupported".to_string());
493        let result = convert_json_arrow_type(&unsupported_type);
494        assert!(result.is_err());
495        assert!(
496            result
497                .unwrap_err()
498                .to_string()
499                .contains("Unsupported Arrow type")
500        );
501    }
502
503    #[test]
504    fn test_list_type() {
505        use arrow::datatypes::Field;
506
507        let inner_field = Field::new("item", DataType::Int32, true);
508        let list_type = DataType::List(Arc::new(inner_field));
509
510        let result = arrow_type_to_json(&list_type).unwrap();
511        assert_eq!(result.r#type, "list");
512        assert!(result.fields.is_some());
513        let fields = result.fields.unwrap();
514        assert_eq!(fields.len(), 1);
515        assert_eq!(fields[0].name, "item");
516        assert_eq!(fields[0].r#type.r#type, "int32");
517    }
518
519    #[test]
520    fn test_struct_type() {
521        use arrow::datatypes::Field;
522
523        let fields = vec![
524            Field::new("id", DataType::Int64, false),
525            Field::new("name", DataType::Utf8, true),
526        ];
527        let struct_type = DataType::Struct(fields.into());
528
529        let result = arrow_type_to_json(&struct_type).unwrap();
530        assert_eq!(result.r#type, "struct");
531        assert!(result.fields.is_some());
532        let json_fields = result.fields.unwrap();
533        assert_eq!(json_fields.len(), 2);
534        assert_eq!(json_fields[0].name, "id");
535        assert_eq!(json_fields[0].r#type.r#type, "int64");
536        assert_eq!(json_fields[1].name, "name");
537        assert_eq!(json_fields[1].r#type.r#type, "utf8");
538    }
539
540    #[test]
541    fn test_fixed_size_list_type() {
542        use arrow::datatypes::Field;
543
544        let inner_field = Field::new("item", DataType::Float32, false);
545        let fixed_list_type = DataType::FixedSizeList(Arc::new(inner_field), 3);
546
547        let result = arrow_type_to_json(&fixed_list_type).unwrap();
548        assert_eq!(result.r#type, "fixed_size_list");
549        assert_eq!(result.length, Some(3));
550        assert!(result.fields.is_some());
551        let fields = result.fields.unwrap();
552        assert_eq!(fields.len(), 1);
553        assert_eq!(fields[0].r#type.r#type, "float32");
554    }
555
556    #[test]
557    fn test_nested_struct_with_list() {
558        use arrow::datatypes::Field;
559
560        let inner_list_field = Field::new("item", DataType::Utf8, true);
561        let list_type = DataType::List(Arc::new(inner_list_field));
562
563        let struct_fields = vec![
564            Field::new("id", DataType::Int32, false),
565            Field::new("tags", list_type, true),
566        ];
567        let struct_type = DataType::Struct(struct_fields.into());
568
569        let result = arrow_type_to_json(&struct_type).unwrap();
570        assert_eq!(result.r#type, "struct");
571        let json_fields = result.fields.unwrap();
572        assert_eq!(json_fields.len(), 2);
573        assert_eq!(json_fields[0].name, "id");
574        assert_eq!(json_fields[1].name, "tags");
575        assert_eq!(json_fields[1].r#type.r#type, "list");
576
577        // Check nested list structure
578        let list_fields = json_fields[1].r#type.fields.as_ref().unwrap();
579        assert_eq!(list_fields.len(), 1);
580        assert_eq!(list_fields[0].r#type.r#type, "utf8");
581    }
582
583    #[test]
584    fn test_map_type_supported() {
585        use arrow::datatypes::Field;
586
587        let key_field = Field::new("keys", DataType::Utf8, false);
588        let value_field = Field::new("values", DataType::Int32, true);
589        let map_type = DataType::Map(
590            Arc::new(Field::new(
591                "entries",
592                DataType::Struct(vec![key_field, value_field].into()),
593                false,
594            )),
595            false,
596        );
597
598        let result = arrow_type_to_json(&map_type);
599        assert!(result.is_ok());
600        let json_type = result.unwrap();
601        assert_eq!(json_type.r#type, "map");
602        assert!(json_type.fields.is_some());
603
604        let fields = json_type.fields.unwrap();
605        assert_eq!(fields.len(), 1);
606        assert_eq!(fields[0].name, "entries");
607        assert_eq!(fields[0].r#type.r#type, "struct");
608    }
609
610    #[test]
611    fn test_additional_types() {
612        // Test Date types
613        let date32 = arrow_type_to_json(&DataType::Date32).unwrap();
614        assert_eq!(date32.r#type, "date32");
615
616        let date64 = arrow_type_to_json(&DataType::Date64).unwrap();
617        assert_eq!(date64.r#type, "date64");
618
619        // Test FixedSizeBinary
620        let fixed_binary = arrow_type_to_json(&DataType::FixedSizeBinary(16)).unwrap();
621        assert_eq!(fixed_binary.r#type, "fixed_size_binary");
622        assert_eq!(fixed_binary.length, Some(16));
623
624        // Test Float16
625        let float16 = arrow_type_to_json(&DataType::Float16).unwrap();
626        assert_eq!(float16.r#type, "float16");
627    }
628
629    /// Verify that convert_json_arrow_type (deserialization) is the inverse of
630    /// arrow_type_to_json (serialization) for all supported types.
631    #[test]
632    fn test_json_arrow_type_roundtrip() {
633        use arrow::datatypes::Field;
634
635        let cases: Vec<DataType> = vec![
636            // Scalars
637            DataType::Null,
638            DataType::Boolean,
639            DataType::Int8,
640            DataType::UInt8,
641            DataType::Int16,
642            DataType::UInt16,
643            DataType::Int32,
644            DataType::UInt32,
645            DataType::Int64,
646            DataType::UInt64,
647            DataType::Float16,
648            DataType::Float32,
649            DataType::Float64,
650            DataType::Utf8,
651            DataType::LargeUtf8,
652            DataType::Binary,
653            DataType::LargeBinary,
654            DataType::Date32,
655            DataType::Date64,
656            DataType::FixedSizeBinary(16),
657            // Decimal types with positive and negative scales
658            DataType::Decimal32(10, -2),
659            DataType::Decimal32(9, 3),
660            DataType::Decimal64(18, -5),
661            DataType::Decimal64(10, 4),
662            DataType::Decimal128(9, -2),
663            DataType::Decimal128(38, 10),
664            DataType::Decimal256(38, 10),
665            DataType::Decimal256(76, -10),
666            // Timestamp and Duration
667            DataType::Timestamp(arrow::datatypes::TimeUnit::Microsecond, None),
668            DataType::Duration(arrow::datatypes::TimeUnit::Microsecond),
669            // Nested
670            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
671            DataType::LargeList(Arc::new(Field::new("item", DataType::Utf8, true))),
672            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, false)), 128),
673            DataType::Struct(
674                vec![
675                    Field::new("a", DataType::Int64, false),
676                    Field::new("b", DataType::Utf8, true),
677                ]
678                .into(),
679            ),
680            // Map
681            DataType::Map(
682                Arc::new(Field::new(
683                    "entries",
684                    DataType::Struct(
685                        vec![
686                            Field::new("keys", DataType::Utf8, false),
687                            Field::new("values", DataType::Int32, true),
688                        ]
689                        .into(),
690                    ),
691                    false,
692                )),
693                false,
694            ),
695        ];
696
697        for dt in &cases {
698            let json = arrow_type_to_json(dt)
699                .unwrap_or_else(|e| panic!("arrow_type_to_json failed for {:?}: {}", dt, e));
700            let back = convert_json_arrow_type(&json)
701                .unwrap_or_else(|e| panic!("convert_json_arrow_type failed for {:?}: {}", dt, e));
702            assert_eq!(&back, dt, "Roundtrip mismatch for {:?}: got {:?}", dt, back);
703        }
704    }
705
706    #[test]
707    fn test_decimal_negative_scale_roundtrip() {
708        // Explicitly test the cases requested by reviewer
709        let cases = vec![
710            DataType::Decimal32(10, -2),
711            DataType::Decimal128(9, -2),
712            DataType::Decimal256(38, 10),
713        ];
714        for dt in &cases {
715            let json = arrow_type_to_json(dt).unwrap();
716            let back = convert_json_arrow_type(&json).unwrap();
717            assert_eq!(&back, dt, "Decimal roundtrip failed for {:?}", dt);
718        }
719    }
720
721    #[test]
722    fn test_schema_with_metadata_roundtrip() {
723        let mut metadata = HashMap::new();
724        metadata.insert("key1".to_string(), "value1".to_string());
725        metadata.insert("key2".to_string(), "value2".to_string());
726
727        let arrow_schema = ArrowSchema::new_with_metadata(
728            vec![
729                Field::new("id", DataType::Int32, false),
730                Field::new("name", DataType::Utf8, true),
731            ],
732            metadata.clone(),
733        );
734
735        let json_schema = arrow_schema_to_json(&arrow_schema).unwrap();
736        assert_eq!(json_schema.metadata.as_ref().unwrap(), &metadata);
737
738        let roundtrip = convert_json_arrow_schema(&json_schema).unwrap();
739        assert_eq!(roundtrip.metadata(), &metadata);
740    }
741
742    #[test]
743    fn test_dictionary_type_unwraps_to_value_type() {
744        let dict_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
745        let json = arrow_type_to_json(&dict_type).unwrap();
746        assert_eq!(json.r#type, "utf8");
747    }
748
749    #[test]
750    fn test_map_keys_sorted_unsupported() {
751        let map_type = DataType::Map(
752            Arc::new(Field::new(
753                "entries",
754                DataType::Struct(
755                    vec![
756                        Field::new("keys", DataType::Utf8, false),
757                        Field::new("values", DataType::Int32, true),
758                    ]
759                    .into(),
760                ),
761                false,
762            )),
763            true, // keys_sorted = true
764        );
765        let result = arrow_type_to_json(&map_type);
766        assert!(result.is_err());
767        assert!(result.unwrap_err().to_string().contains("keys_sorted=true"));
768    }
769
770    #[test]
771    fn test_unsupported_types_error() {
772        // RunEndEncoded
773        let ree = DataType::RunEndEncoded(
774            Arc::new(Field::new("run_ends", DataType::Int32, false)),
775            Arc::new(Field::new("values", DataType::Utf8, true)),
776        );
777        assert!(arrow_type_to_json(&ree).is_err());
778
779        // ListView
780        let lv = DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true)));
781        assert!(arrow_type_to_json(&lv).is_err());
782
783        // LargeListView
784        let llv = DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true)));
785        assert!(arrow_type_to_json(&llv).is_err());
786
787        // Utf8View / BinaryView
788        assert!(arrow_type_to_json(&DataType::Utf8View).is_err());
789        assert!(arrow_type_to_json(&DataType::BinaryView).is_err());
790    }
791
792    #[test]
793    fn test_large_list_roundtrip() {
794        let inner_field = Field::new("item", DataType::Float64, true);
795        let large_list = DataType::LargeList(Arc::new(inner_field));
796
797        let json = arrow_type_to_json(&large_list).unwrap();
798        assert_eq!(json.r#type, "large_list");
799
800        let back = convert_json_arrow_type(&json).unwrap();
801        assert_eq!(back, large_list);
802    }
803
804    #[test]
805    fn test_field_with_metadata_roundtrip() {
806        let mut field_meta = HashMap::new();
807        field_meta.insert("custom_key".to_string(), "custom_val".to_string());
808
809        let field = Field::new("col", DataType::Int64, false).with_metadata(field_meta.clone());
810        let schema = ArrowSchema::new(vec![field]);
811
812        let json_schema = arrow_schema_to_json(&schema).unwrap();
813        let roundtrip = convert_json_arrow_schema(&json_schema).unwrap();
814        assert_eq!(roundtrip.field(0).metadata(), &field_meta);
815    }
816
817    #[test]
818    fn test_nested_list_with_field_metadata() {
819        let mut meta = HashMap::new();
820        meta.insert("encoding".to_string(), "delta".to_string());
821
822        let inner = Field::new("item", DataType::Int32, true).with_metadata(meta.clone());
823        let list_type = DataType::List(Arc::new(inner));
824
825        let json = arrow_type_to_json(&list_type).unwrap();
826        let fields = json.fields.as_ref().unwrap();
827        assert_eq!(fields[0].metadata.as_ref().unwrap(), &meta);
828    }
829}