Skip to main content

lance_namespace/
schema.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Schema conversion utilities for Lance Namespace.
5//!
6//! This module provides functions to convert between JsonArrow schema representations
7//! and Arrow schema types.
8
9use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
10use lance_core::{Error, Result};
11use lance_namespace_reqwest_client::models::{JsonArrowDataType, JsonArrowField, JsonArrowSchema};
12use snafu::Location;
13
14/// Convert Arrow Schema to JsonArrowSchema
15pub fn arrow_schema_to_json(arrow_schema: &ArrowSchema) -> Result<JsonArrowSchema> {
16    let fields: Result<Vec<JsonArrowField>> = arrow_schema
17        .fields()
18        .iter()
19        .map(|f| arrow_field_to_json(f.as_ref()))
20        .collect();
21
22    let metadata = if arrow_schema.metadata().is_empty() {
23        None
24    } else {
25        Some(arrow_schema.metadata().clone())
26    };
27
28    Ok(JsonArrowSchema {
29        fields: fields?,
30        metadata,
31    })
32}
33
34/// Convert Arrow Field to JsonArrowField
35fn arrow_field_to_json(arrow_field: &Field) -> Result<JsonArrowField> {
36    let data_type = arrow_type_to_json(arrow_field.data_type())?;
37
38    Ok(JsonArrowField {
39        name: arrow_field.name().clone(),
40        nullable: arrow_field.is_nullable(),
41        r#type: Box::new(data_type),
42        metadata: if arrow_field.metadata().is_empty() {
43            None
44        } else {
45            Some(arrow_field.metadata().clone())
46        },
47    })
48}
49
50/// Convert Arrow DataType to JsonArrowDataType
51fn arrow_type_to_json(data_type: &DataType) -> Result<JsonArrowDataType> {
52    match data_type {
53        // Primitive types
54        DataType::Null => Ok(JsonArrowDataType::new("null".to_string())),
55        DataType::Boolean => Ok(JsonArrowDataType::new("bool".to_string())),
56        DataType::Int8 => Ok(JsonArrowDataType::new("int8".to_string())),
57        DataType::UInt8 => Ok(JsonArrowDataType::new("uint8".to_string())),
58        DataType::Int16 => Ok(JsonArrowDataType::new("int16".to_string())),
59        DataType::UInt16 => Ok(JsonArrowDataType::new("uint16".to_string())),
60        DataType::Int32 => Ok(JsonArrowDataType::new("int32".to_string())),
61        DataType::UInt32 => Ok(JsonArrowDataType::new("uint32".to_string())),
62        DataType::Int64 => Ok(JsonArrowDataType::new("int64".to_string())),
63        DataType::UInt64 => Ok(JsonArrowDataType::new("uint64".to_string())),
64        DataType::Float16 => Ok(JsonArrowDataType::new("float16".to_string())),
65        DataType::Float32 => Ok(JsonArrowDataType::new("float32".to_string())),
66        DataType::Float64 => Ok(JsonArrowDataType::new("float64".to_string())),
67        DataType::Decimal32(precision, scale) => {
68            let mut dt = JsonArrowDataType::new("decimal32".to_string());
69            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
70            Ok(dt)
71        }
72        DataType::Decimal64(precision, scale) => {
73            let mut dt = JsonArrowDataType::new("decimal64".to_string());
74            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
75            Ok(dt)
76        }
77        DataType::Decimal128(precision, scale) => {
78            let mut dt = JsonArrowDataType::new("decimal128".to_string());
79            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
80            Ok(dt)
81        }
82        DataType::Decimal256(precision, scale) => {
83            let mut dt = JsonArrowDataType::new("decimal256".to_string());
84            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
85            Ok(dt)
86        }
87        DataType::Date32 => Ok(JsonArrowDataType::new("date32".to_string())),
88        DataType::Date64 => Ok(JsonArrowDataType::new("date64".to_string())),
89        DataType::Time32(_) => Ok(JsonArrowDataType::new("time32".to_string())),
90        DataType::Time64(_) => Ok(JsonArrowDataType::new("time64".to_string())),
91        DataType::Timestamp(_, _tz) => {
92            // TODO: We could encode timezone info if needed
93            Ok(JsonArrowDataType::new("timestamp".to_string()))
94        }
95        DataType::Duration(_) => Ok(JsonArrowDataType::new("duration".to_string())),
96        DataType::Interval(_) => Ok(JsonArrowDataType::new("interval".to_string())),
97
98        // String and Binary types
99        DataType::Utf8 => Ok(JsonArrowDataType::new("utf8".to_string())),
100        DataType::LargeUtf8 => Ok(JsonArrowDataType::new("large_utf8".to_string())),
101        DataType::Binary => Ok(JsonArrowDataType::new("binary".to_string())),
102        DataType::LargeBinary => Ok(JsonArrowDataType::new("large_binary".to_string())),
103        DataType::FixedSizeBinary(size) => {
104            let mut dt = JsonArrowDataType::new("fixed_size_binary".to_string());
105            dt.length = Some(*size as i64);
106            Ok(dt)
107        }
108
109        // Nested types
110        DataType::List(field) => {
111            let inner_type = arrow_type_to_json(field.data_type())?;
112            let inner_field = JsonArrowField {
113                name: field.name().clone(),
114                nullable: field.is_nullable(),
115                r#type: Box::new(inner_type),
116                metadata: if field.metadata().is_empty() {
117                    None
118                } else {
119                    Some(field.metadata().clone())
120                },
121            };
122            Ok(JsonArrowDataType {
123                r#type: "list".to_string(),
124                fields: Some(vec![inner_field]),
125                length: None,
126            })
127        }
128        DataType::LargeList(field) => {
129            let inner_type = arrow_type_to_json(field.data_type())?;
130            let inner_field = JsonArrowField {
131                name: field.name().clone(),
132                nullable: field.is_nullable(),
133                r#type: Box::new(inner_type),
134                metadata: if field.metadata().is_empty() {
135                    None
136                } else {
137                    Some(field.metadata().clone())
138                },
139            };
140            Ok(JsonArrowDataType {
141                r#type: "large_list".to_string(),
142                fields: Some(vec![inner_field]),
143                length: None,
144            })
145        }
146        DataType::FixedSizeList(field, size) => {
147            let inner_type = arrow_type_to_json(field.data_type())?;
148            let inner_field = JsonArrowField {
149                name: field.name().clone(),
150                nullable: field.is_nullable(),
151                r#type: Box::new(inner_type),
152                metadata: if field.metadata().is_empty() {
153                    None
154                } else {
155                    Some(field.metadata().clone())
156                },
157            };
158            Ok(JsonArrowDataType {
159                r#type: "fixed_size_list".to_string(),
160                fields: Some(vec![inner_field]),
161                length: Some(*size as i64),
162            })
163        }
164        DataType::Struct(fields) => {
165            let json_fields: Result<Vec<JsonArrowField>> = fields
166                .iter()
167                .map(|f| arrow_field_to_json(f.as_ref()))
168                .collect();
169            Ok(JsonArrowDataType {
170                r#type: "struct".to_string(),
171                fields: Some(json_fields?),
172                length: None,
173            })
174        }
175        DataType::Union(_, _) => {
176            // Union types are complex, for now we'll skip detailed conversion
177            Ok(JsonArrowDataType::new("union".to_string()))
178        }
179        DataType::Dictionary(_, value_type) => {
180            // For dictionary, return the value type
181            arrow_type_to_json(value_type)
182        }
183
184        DataType::Map(entries_field, keys_sorted) => {
185            if *keys_sorted {
186                return Err(Error::Namespace {
187                    source: format!(
188                        "Map types with keys_sorted=true are not yet supported for JSON conversion: {:?}",
189                        data_type
190                    )
191                        .into(),
192                    location: Location::new(file!(), line!(), column!()),
193                });
194            }
195            let inner_type = arrow_type_to_json(entries_field.data_type())?;
196            let inner_field = JsonArrowField {
197                name: entries_field.name().clone(),
198                nullable: entries_field.is_nullable(),
199                r#type: Box::new(inner_type),
200                metadata: if entries_field.metadata().is_empty() {
201                    None
202                } else {
203                    Some(entries_field.metadata().clone())
204                },
205            };
206            Ok(JsonArrowDataType {
207                r#type: "map".to_string(),
208                fields: Some(vec![inner_field]),
209                length: None,
210            })
211        }
212
213        // Unsupported types
214        DataType::RunEndEncoded(_, _) => Err(Error::Namespace {
215            source: format!(
216                "RunEndEncoded type is not yet supported for JSON conversion: {:?}",
217                data_type
218            )
219            .into(),
220            location: Location::new(file!(), line!(), column!()),
221        }),
222        DataType::ListView(_) | DataType::LargeListView(_) => Err(Error::Namespace {
223            source: format!(
224                "ListView types are not yet supported for JSON conversion: {:?}",
225                data_type
226            )
227            .into(),
228            location: Location::new(file!(), line!(), column!()),
229        }),
230        DataType::Utf8View | DataType::BinaryView => Err(Error::Namespace {
231            source: format!(
232                "View types are not yet supported for JSON conversion: {:?}",
233                data_type
234            )
235            .into(),
236            location: Location::new(file!(), line!(), column!()),
237        }),
238    }
239}
240
241/// Convert JsonArrowSchema to Arrow Schema
242pub fn convert_json_arrow_schema(json_schema: &JsonArrowSchema) -> Result<ArrowSchema> {
243    let fields: Result<Vec<Field>> = json_schema
244        .fields
245        .iter()
246        .map(convert_json_arrow_field)
247        .collect();
248
249    let metadata = json_schema.metadata.as_ref().cloned().unwrap_or_default();
250
251    Ok(ArrowSchema::new_with_metadata(fields?, metadata))
252}
253
254/// Convert JsonArrowField to Arrow Field
255pub fn convert_json_arrow_field(json_field: &JsonArrowField) -> Result<Field> {
256    let data_type = convert_json_arrow_type(&json_field.r#type)?;
257    let nullable = json_field.nullable;
258
259    let field = Field::new(&json_field.name, data_type, nullable);
260    Ok(match json_field.metadata.as_ref() {
261        Some(metadata) => field.with_metadata(metadata.clone()),
262        None => field,
263    })
264}
265
266/// Convert JsonArrowDataType to Arrow DataType
267pub fn convert_json_arrow_type(json_type: &JsonArrowDataType) -> Result<DataType> {
268    let type_name = json_type.r#type.to_lowercase();
269
270    match type_name.as_str() {
271        "null" => Ok(DataType::Null),
272        "bool" | "boolean" => Ok(DataType::Boolean),
273        "int8" => Ok(DataType::Int8),
274        "uint8" => Ok(DataType::UInt8),
275        "int16" => Ok(DataType::Int16),
276        "uint16" => Ok(DataType::UInt16),
277        "int32" => Ok(DataType::Int32),
278        "uint32" => Ok(DataType::UInt32),
279        "int64" => Ok(DataType::Int64),
280        "uint64" => Ok(DataType::UInt64),
281        "float32" => Ok(DataType::Float32),
282        "float64" => Ok(DataType::Float64),
283        "utf8" => Ok(DataType::Utf8),
284        "binary" => Ok(DataType::Binary),
285        _ => Err(Error::Namespace {
286            source: format!("Unsupported Arrow type: {}", type_name).into(),
287            location: Location::new(file!(), line!(), column!()),
288        }),
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use std::collections::HashMap;
296    use std::sync::Arc;
297
298    #[test]
299    fn test_extension_metadata_preserved_in_json_roundtrip() {
300        const ARROW_EXT_NAME_KEY: &str = "ARROW:extension:name";
301        const LANCE_JSON_EXT_NAME: &str = "lance.json";
302
303        let meta_field =
304            Field::new("meta", DataType::Binary, true).with_metadata(HashMap::from([(
305                ARROW_EXT_NAME_KEY.to_string(),
306                LANCE_JSON_EXT_NAME.to_string(),
307            )]));
308        let arrow_schema =
309            ArrowSchema::new(vec![Field::new("id", DataType::Int32, false), meta_field]);
310
311        let json_schema = arrow_schema_to_json(&arrow_schema).unwrap();
312        let meta_json_field = json_schema
313            .fields
314            .iter()
315            .find(|f| f.name == "meta")
316            .unwrap();
317        assert!(meta_json_field
318            .metadata
319            .as_ref()
320            .unwrap()
321            .contains_key(ARROW_EXT_NAME_KEY));
322
323        let roundtrip = convert_json_arrow_schema(&json_schema).unwrap();
324        let meta_field = roundtrip.field_with_name("meta").unwrap();
325        assert_eq!(
326            meta_field.metadata().get(ARROW_EXT_NAME_KEY),
327            Some(&LANCE_JSON_EXT_NAME.to_string())
328        );
329    }
330
331    #[test]
332    fn test_convert_basic_types() {
333        // Test int32
334        let int_type = JsonArrowDataType::new("int32".to_string());
335        let result = convert_json_arrow_type(&int_type).unwrap();
336        assert_eq!(result, DataType::Int32);
337
338        // Test utf8
339        let string_type = JsonArrowDataType::new("utf8".to_string());
340        let result = convert_json_arrow_type(&string_type).unwrap();
341        assert_eq!(result, DataType::Utf8);
342
343        // Test float64
344        let float_type = JsonArrowDataType::new("float64".to_string());
345        let result = convert_json_arrow_type(&float_type).unwrap();
346        assert_eq!(result, DataType::Float64);
347
348        // Test binary
349        let binary_type = JsonArrowDataType::new("binary".to_string());
350        let result = convert_json_arrow_type(&binary_type).unwrap();
351        assert_eq!(result, DataType::Binary);
352    }
353
354    #[test]
355    fn test_convert_field() {
356        let int_type = JsonArrowDataType::new("int32".to_string());
357        let field = JsonArrowField {
358            name: "test_field".to_string(),
359            r#type: Box::new(int_type),
360            nullable: false,
361            metadata: None,
362        };
363
364        let result = convert_json_arrow_field(&field).unwrap();
365        assert_eq!(result.name(), "test_field");
366        assert_eq!(result.data_type(), &DataType::Int32);
367        assert!(!result.is_nullable());
368    }
369
370    #[test]
371    fn test_convert_schema() {
372        let int_type = JsonArrowDataType::new("int32".to_string());
373        let string_type = JsonArrowDataType::new("utf8".to_string());
374
375        let id_field = JsonArrowField {
376            name: "id".to_string(),
377            r#type: Box::new(int_type),
378            nullable: false,
379            metadata: None,
380        };
381
382        let name_field = JsonArrowField {
383            name: "name".to_string(),
384            r#type: Box::new(string_type),
385            nullable: true,
386            metadata: None,
387        };
388
389        let mut metadata = HashMap::new();
390        metadata.insert("key".to_string(), "value".to_string());
391
392        let schema = JsonArrowSchema {
393            fields: vec![id_field, name_field],
394            metadata: Some(metadata.clone()),
395        };
396
397        let result = convert_json_arrow_schema(&schema).unwrap();
398        assert_eq!(result.fields().len(), 2);
399        assert_eq!(result.field(0).name(), "id");
400        assert_eq!(result.field(1).name(), "name");
401        assert_eq!(result.metadata(), &metadata);
402    }
403
404    #[test]
405    fn test_unsupported_type() {
406        let unsupported_type = JsonArrowDataType::new("unsupported".to_string());
407        let result = convert_json_arrow_type(&unsupported_type);
408        assert!(result.is_err());
409        assert!(result
410            .unwrap_err()
411            .to_string()
412            .contains("Unsupported Arrow type"));
413    }
414
415    #[test]
416    fn test_list_type() {
417        use arrow::datatypes::Field;
418
419        let inner_field = Field::new("item", DataType::Int32, true);
420        let list_type = DataType::List(Arc::new(inner_field));
421
422        let result = arrow_type_to_json(&list_type).unwrap();
423        assert_eq!(result.r#type, "list");
424        assert!(result.fields.is_some());
425        let fields = result.fields.unwrap();
426        assert_eq!(fields.len(), 1);
427        assert_eq!(fields[0].name, "item");
428        assert_eq!(fields[0].r#type.r#type, "int32");
429    }
430
431    #[test]
432    fn test_struct_type() {
433        use arrow::datatypes::Field;
434
435        let fields = vec![
436            Field::new("id", DataType::Int64, false),
437            Field::new("name", DataType::Utf8, true),
438        ];
439        let struct_type = DataType::Struct(fields.into());
440
441        let result = arrow_type_to_json(&struct_type).unwrap();
442        assert_eq!(result.r#type, "struct");
443        assert!(result.fields.is_some());
444        let json_fields = result.fields.unwrap();
445        assert_eq!(json_fields.len(), 2);
446        assert_eq!(json_fields[0].name, "id");
447        assert_eq!(json_fields[0].r#type.r#type, "int64");
448        assert_eq!(json_fields[1].name, "name");
449        assert_eq!(json_fields[1].r#type.r#type, "utf8");
450    }
451
452    #[test]
453    fn test_fixed_size_list_type() {
454        use arrow::datatypes::Field;
455
456        let inner_field = Field::new("item", DataType::Float32, false);
457        let fixed_list_type = DataType::FixedSizeList(Arc::new(inner_field), 3);
458
459        let result = arrow_type_to_json(&fixed_list_type).unwrap();
460        assert_eq!(result.r#type, "fixed_size_list");
461        assert_eq!(result.length, Some(3));
462        assert!(result.fields.is_some());
463        let fields = result.fields.unwrap();
464        assert_eq!(fields.len(), 1);
465        assert_eq!(fields[0].r#type.r#type, "float32");
466    }
467
468    #[test]
469    fn test_nested_struct_with_list() {
470        use arrow::datatypes::Field;
471
472        let inner_list_field = Field::new("item", DataType::Utf8, true);
473        let list_type = DataType::List(Arc::new(inner_list_field));
474
475        let struct_fields = vec![
476            Field::new("id", DataType::Int32, false),
477            Field::new("tags", list_type, true),
478        ];
479        let struct_type = DataType::Struct(struct_fields.into());
480
481        let result = arrow_type_to_json(&struct_type).unwrap();
482        assert_eq!(result.r#type, "struct");
483        let json_fields = result.fields.unwrap();
484        assert_eq!(json_fields.len(), 2);
485        assert_eq!(json_fields[0].name, "id");
486        assert_eq!(json_fields[1].name, "tags");
487        assert_eq!(json_fields[1].r#type.r#type, "list");
488
489        // Check nested list structure
490        let list_fields = json_fields[1].r#type.fields.as_ref().unwrap();
491        assert_eq!(list_fields.len(), 1);
492        assert_eq!(list_fields[0].r#type.r#type, "utf8");
493    }
494
495    #[test]
496    fn test_map_type_supported() {
497        use arrow::datatypes::Field;
498
499        let key_field = Field::new("keys", DataType::Utf8, false);
500        let value_field = Field::new("values", DataType::Int32, true);
501        let map_type = DataType::Map(
502            Arc::new(Field::new(
503                "entries",
504                DataType::Struct(vec![key_field, value_field].into()),
505                false,
506            )),
507            false,
508        );
509
510        let result = arrow_type_to_json(&map_type);
511        assert!(result.is_ok());
512        let json_type = result.unwrap();
513        assert_eq!(json_type.r#type, "map");
514        assert!(json_type.fields.is_some());
515
516        let fields = json_type.fields.unwrap();
517        assert_eq!(fields.len(), 1);
518        assert_eq!(fields[0].name, "entries");
519        assert_eq!(fields[0].r#type.r#type, "struct");
520    }
521
522    #[test]
523    fn test_additional_types() {
524        // Test Date types
525        let date32 = arrow_type_to_json(&DataType::Date32).unwrap();
526        assert_eq!(date32.r#type, "date32");
527
528        let date64 = arrow_type_to_json(&DataType::Date64).unwrap();
529        assert_eq!(date64.r#type, "date64");
530
531        // Test FixedSizeBinary
532        let fixed_binary = arrow_type_to_json(&DataType::FixedSizeBinary(16)).unwrap();
533        assert_eq!(fixed_binary.r#type, "fixed_size_binary");
534        assert_eq!(fixed_binary.length, Some(16));
535
536        // Test Float16
537        let float16 = arrow_type_to_json(&DataType::Float16).unwrap();
538        assert_eq!(float16.r#type, "float16");
539    }
540}