lance_namespace/
schema.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4//! Schema conversion utilities for Lance Namespace.
5//!
6//! This module provides functions to convert between JsonArrow schema representations
7//! and Arrow schema types.
8
9use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
10use lance_core::{Error, Result};
11use lance_namespace_reqwest_client::models::{JsonArrowDataType, JsonArrowField, JsonArrowSchema};
12use snafu::Location;
13
14/// Convert Arrow Schema to JsonArrowSchema
15pub fn arrow_schema_to_json(arrow_schema: &ArrowSchema) -> Result<JsonArrowSchema> {
16    let fields: Result<Vec<JsonArrowField>> = arrow_schema
17        .fields()
18        .iter()
19        .map(|f| arrow_field_to_json(f.as_ref()))
20        .collect();
21
22    let metadata = if arrow_schema.metadata().is_empty() {
23        None
24    } else {
25        Some(arrow_schema.metadata().clone())
26    };
27
28    Ok(JsonArrowSchema {
29        fields: fields?,
30        metadata,
31    })
32}
33
34/// Convert Arrow Field to JsonArrowField
35fn arrow_field_to_json(arrow_field: &Field) -> Result<JsonArrowField> {
36    let data_type = arrow_type_to_json(arrow_field.data_type())?;
37
38    Ok(JsonArrowField {
39        name: arrow_field.name().clone(),
40        nullable: arrow_field.is_nullable(),
41        r#type: Box::new(data_type),
42        metadata: if arrow_field.metadata().is_empty() {
43            None
44        } else {
45            Some(arrow_field.metadata().clone())
46        },
47    })
48}
49
50/// Convert Arrow DataType to JsonArrowDataType
51fn arrow_type_to_json(data_type: &DataType) -> Result<JsonArrowDataType> {
52    match data_type {
53        // Primitive types
54        DataType::Null => Ok(JsonArrowDataType::new("null".to_string())),
55        DataType::Boolean => Ok(JsonArrowDataType::new("bool".to_string())),
56        DataType::Int8 => Ok(JsonArrowDataType::new("int8".to_string())),
57        DataType::UInt8 => Ok(JsonArrowDataType::new("uint8".to_string())),
58        DataType::Int16 => Ok(JsonArrowDataType::new("int16".to_string())),
59        DataType::UInt16 => Ok(JsonArrowDataType::new("uint16".to_string())),
60        DataType::Int32 => Ok(JsonArrowDataType::new("int32".to_string())),
61        DataType::UInt32 => Ok(JsonArrowDataType::new("uint32".to_string())),
62        DataType::Int64 => Ok(JsonArrowDataType::new("int64".to_string())),
63        DataType::UInt64 => Ok(JsonArrowDataType::new("uint64".to_string())),
64        DataType::Float16 => Ok(JsonArrowDataType::new("float16".to_string())),
65        DataType::Float32 => Ok(JsonArrowDataType::new("float32".to_string())),
66        DataType::Float64 => Ok(JsonArrowDataType::new("float64".to_string())),
67        DataType::Decimal32(precision, scale) => {
68            let mut dt = JsonArrowDataType::new("decimal32".to_string());
69            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
70            Ok(dt)
71        }
72        DataType::Decimal64(precision, scale) => {
73            let mut dt = JsonArrowDataType::new("decimal64".to_string());
74            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
75            Ok(dt)
76        }
77        DataType::Decimal128(precision, scale) => {
78            let mut dt = JsonArrowDataType::new("decimal128".to_string());
79            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
80            Ok(dt)
81        }
82        DataType::Decimal256(precision, scale) => {
83            let mut dt = JsonArrowDataType::new("decimal256".to_string());
84            dt.length = Some(*precision as i64 * 1000 + *scale as i64); // Encode precision and scale
85            Ok(dt)
86        }
87        DataType::Date32 => Ok(JsonArrowDataType::new("date32".to_string())),
88        DataType::Date64 => Ok(JsonArrowDataType::new("date64".to_string())),
89        DataType::Time32(_) => Ok(JsonArrowDataType::new("time32".to_string())),
90        DataType::Time64(_) => Ok(JsonArrowDataType::new("time64".to_string())),
91        DataType::Timestamp(_, _tz) => {
92            // TODO: We could encode timezone info if needed
93            Ok(JsonArrowDataType::new("timestamp".to_string()))
94        }
95        DataType::Duration(_) => Ok(JsonArrowDataType::new("duration".to_string())),
96        DataType::Interval(_) => Ok(JsonArrowDataType::new("interval".to_string())),
97
98        // String and Binary types
99        DataType::Utf8 => Ok(JsonArrowDataType::new("utf8".to_string())),
100        DataType::LargeUtf8 => Ok(JsonArrowDataType::new("large_utf8".to_string())),
101        DataType::Binary => Ok(JsonArrowDataType::new("binary".to_string())),
102        DataType::LargeBinary => Ok(JsonArrowDataType::new("large_binary".to_string())),
103        DataType::FixedSizeBinary(size) => {
104            let mut dt = JsonArrowDataType::new("fixed_size_binary".to_string());
105            dt.length = Some(*size as i64);
106            Ok(dt)
107        }
108
109        // Nested types
110        DataType::List(field) => {
111            let inner_type = arrow_type_to_json(field.data_type())?;
112            let inner_field = JsonArrowField {
113                name: field.name().clone(),
114                nullable: field.is_nullable(),
115                r#type: Box::new(inner_type),
116                metadata: if field.metadata().is_empty() {
117                    None
118                } else {
119                    Some(field.metadata().clone())
120                },
121            };
122            Ok(JsonArrowDataType {
123                r#type: "list".to_string(),
124                fields: Some(vec![inner_field]),
125                length: None,
126            })
127        }
128        DataType::LargeList(field) => {
129            let inner_type = arrow_type_to_json(field.data_type())?;
130            let inner_field = JsonArrowField {
131                name: field.name().clone(),
132                nullable: field.is_nullable(),
133                r#type: Box::new(inner_type),
134                metadata: if field.metadata().is_empty() {
135                    None
136                } else {
137                    Some(field.metadata().clone())
138                },
139            };
140            Ok(JsonArrowDataType {
141                r#type: "large_list".to_string(),
142                fields: Some(vec![inner_field]),
143                length: None,
144            })
145        }
146        DataType::FixedSizeList(field, size) => {
147            let inner_type = arrow_type_to_json(field.data_type())?;
148            let inner_field = JsonArrowField {
149                name: field.name().clone(),
150                nullable: field.is_nullable(),
151                r#type: Box::new(inner_type),
152                metadata: if field.metadata().is_empty() {
153                    None
154                } else {
155                    Some(field.metadata().clone())
156                },
157            };
158            Ok(JsonArrowDataType {
159                r#type: "fixed_size_list".to_string(),
160                fields: Some(vec![inner_field]),
161                length: Some(*size as i64),
162            })
163        }
164        DataType::Struct(fields) => {
165            let json_fields: Result<Vec<JsonArrowField>> = fields
166                .iter()
167                .map(|f| arrow_field_to_json(f.as_ref()))
168                .collect();
169            Ok(JsonArrowDataType {
170                r#type: "struct".to_string(),
171                fields: Some(json_fields?),
172                length: None,
173            })
174        }
175        DataType::Union(_, _) => {
176            // Union types are complex, for now we'll skip detailed conversion
177            Ok(JsonArrowDataType::new("union".to_string()))
178        }
179        DataType::Dictionary(_, value_type) => {
180            // For dictionary, return the value type
181            arrow_type_to_json(value_type)
182        }
183
184        // Unsupported types
185        DataType::Map(_, _) => Err(Error::Namespace {
186            source: "Map type is not supported by Lance".into(),
187            location: Location::new(file!(), line!(), column!()),
188        }),
189        DataType::RunEndEncoded(_, _) => Err(Error::Namespace {
190            source: format!(
191                "RunEndEncoded type is not yet supported for JSON conversion: {:?}",
192                data_type
193            )
194            .into(),
195            location: Location::new(file!(), line!(), column!()),
196        }),
197        DataType::ListView(_) | DataType::LargeListView(_) => Err(Error::Namespace {
198            source: format!(
199                "ListView types are not yet supported for JSON conversion: {:?}",
200                data_type
201            )
202            .into(),
203            location: Location::new(file!(), line!(), column!()),
204        }),
205        DataType::Utf8View | DataType::BinaryView => Err(Error::Namespace {
206            source: format!(
207                "View types are not yet supported for JSON conversion: {:?}",
208                data_type
209            )
210            .into(),
211            location: Location::new(file!(), line!(), column!()),
212        }),
213    }
214}
215
216/// Convert JsonArrowSchema to Arrow Schema
217pub fn convert_json_arrow_schema(json_schema: &JsonArrowSchema) -> Result<ArrowSchema> {
218    let fields: Result<Vec<Field>> = json_schema
219        .fields
220        .iter()
221        .map(convert_json_arrow_field)
222        .collect();
223
224    let metadata = json_schema.metadata.as_ref().cloned().unwrap_or_default();
225
226    Ok(ArrowSchema::new_with_metadata(fields?, metadata))
227}
228
229/// Convert JsonArrowField to Arrow Field
230pub fn convert_json_arrow_field(json_field: &JsonArrowField) -> Result<Field> {
231    let data_type = convert_json_arrow_type(&json_field.r#type)?;
232    let nullable = json_field.nullable;
233
234    Ok(Field::new(&json_field.name, data_type, nullable))
235}
236
237/// Convert JsonArrowDataType to Arrow DataType
238pub fn convert_json_arrow_type(json_type: &JsonArrowDataType) -> Result<DataType> {
239    let type_name = json_type.r#type.to_lowercase();
240
241    match type_name.as_str() {
242        "null" => Ok(DataType::Null),
243        "bool" | "boolean" => Ok(DataType::Boolean),
244        "int8" => Ok(DataType::Int8),
245        "uint8" => Ok(DataType::UInt8),
246        "int16" => Ok(DataType::Int16),
247        "uint16" => Ok(DataType::UInt16),
248        "int32" => Ok(DataType::Int32),
249        "uint32" => Ok(DataType::UInt32),
250        "int64" => Ok(DataType::Int64),
251        "uint64" => Ok(DataType::UInt64),
252        "float32" => Ok(DataType::Float32),
253        "float64" => Ok(DataType::Float64),
254        "utf8" => Ok(DataType::Utf8),
255        "binary" => Ok(DataType::Binary),
256        _ => Err(Error::Namespace {
257            source: format!("Unsupported Arrow type: {}", type_name).into(),
258            location: Location::new(file!(), line!(), column!()),
259        }),
260    }
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266    use std::collections::HashMap;
267    use std::sync::Arc;
268
269    #[test]
270    fn test_convert_basic_types() {
271        // Test int32
272        let int_type = JsonArrowDataType::new("int32".to_string());
273        let result = convert_json_arrow_type(&int_type).unwrap();
274        assert_eq!(result, DataType::Int32);
275
276        // Test utf8
277        let string_type = JsonArrowDataType::new("utf8".to_string());
278        let result = convert_json_arrow_type(&string_type).unwrap();
279        assert_eq!(result, DataType::Utf8);
280
281        // Test float64
282        let float_type = JsonArrowDataType::new("float64".to_string());
283        let result = convert_json_arrow_type(&float_type).unwrap();
284        assert_eq!(result, DataType::Float64);
285
286        // Test binary
287        let binary_type = JsonArrowDataType::new("binary".to_string());
288        let result = convert_json_arrow_type(&binary_type).unwrap();
289        assert_eq!(result, DataType::Binary);
290    }
291
292    #[test]
293    fn test_convert_field() {
294        let int_type = JsonArrowDataType::new("int32".to_string());
295        let field = JsonArrowField {
296            name: "test_field".to_string(),
297            r#type: Box::new(int_type),
298            nullable: false,
299            metadata: None,
300        };
301
302        let result = convert_json_arrow_field(&field).unwrap();
303        assert_eq!(result.name(), "test_field");
304        assert_eq!(result.data_type(), &DataType::Int32);
305        assert!(!result.is_nullable());
306    }
307
308    #[test]
309    fn test_convert_schema() {
310        let int_type = JsonArrowDataType::new("int32".to_string());
311        let string_type = JsonArrowDataType::new("utf8".to_string());
312
313        let id_field = JsonArrowField {
314            name: "id".to_string(),
315            r#type: Box::new(int_type),
316            nullable: false,
317            metadata: None,
318        };
319
320        let name_field = JsonArrowField {
321            name: "name".to_string(),
322            r#type: Box::new(string_type),
323            nullable: true,
324            metadata: None,
325        };
326
327        let mut metadata = HashMap::new();
328        metadata.insert("key".to_string(), "value".to_string());
329
330        let schema = JsonArrowSchema {
331            fields: vec![id_field, name_field],
332            metadata: Some(metadata.clone()),
333        };
334
335        let result = convert_json_arrow_schema(&schema).unwrap();
336        assert_eq!(result.fields().len(), 2);
337        assert_eq!(result.field(0).name(), "id");
338        assert_eq!(result.field(1).name(), "name");
339        assert_eq!(result.metadata(), &metadata);
340    }
341
342    #[test]
343    fn test_unsupported_type() {
344        let unsupported_type = JsonArrowDataType::new("unsupported".to_string());
345        let result = convert_json_arrow_type(&unsupported_type);
346        assert!(result.is_err());
347        assert!(result
348            .unwrap_err()
349            .to_string()
350            .contains("Unsupported Arrow type"));
351    }
352
353    #[test]
354    fn test_list_type() {
355        use arrow::datatypes::Field;
356
357        let inner_field = Field::new("item", DataType::Int32, true);
358        let list_type = DataType::List(Arc::new(inner_field));
359
360        let result = arrow_type_to_json(&list_type).unwrap();
361        assert_eq!(result.r#type, "list");
362        assert!(result.fields.is_some());
363        let fields = result.fields.unwrap();
364        assert_eq!(fields.len(), 1);
365        assert_eq!(fields[0].name, "item");
366        assert_eq!(fields[0].r#type.r#type, "int32");
367    }
368
369    #[test]
370    fn test_struct_type() {
371        use arrow::datatypes::Field;
372
373        let fields = vec![
374            Field::new("id", DataType::Int64, false),
375            Field::new("name", DataType::Utf8, true),
376        ];
377        let struct_type = DataType::Struct(fields.into());
378
379        let result = arrow_type_to_json(&struct_type).unwrap();
380        assert_eq!(result.r#type, "struct");
381        assert!(result.fields.is_some());
382        let json_fields = result.fields.unwrap();
383        assert_eq!(json_fields.len(), 2);
384        assert_eq!(json_fields[0].name, "id");
385        assert_eq!(json_fields[0].r#type.r#type, "int64");
386        assert_eq!(json_fields[1].name, "name");
387        assert_eq!(json_fields[1].r#type.r#type, "utf8");
388    }
389
390    #[test]
391    fn test_fixed_size_list_type() {
392        use arrow::datatypes::Field;
393
394        let inner_field = Field::new("item", DataType::Float32, false);
395        let fixed_list_type = DataType::FixedSizeList(Arc::new(inner_field), 3);
396
397        let result = arrow_type_to_json(&fixed_list_type).unwrap();
398        assert_eq!(result.r#type, "fixed_size_list");
399        assert_eq!(result.length, Some(3));
400        assert!(result.fields.is_some());
401        let fields = result.fields.unwrap();
402        assert_eq!(fields.len(), 1);
403        assert_eq!(fields[0].r#type.r#type, "float32");
404    }
405
406    #[test]
407    fn test_nested_struct_with_list() {
408        use arrow::datatypes::Field;
409
410        let inner_list_field = Field::new("item", DataType::Utf8, true);
411        let list_type = DataType::List(Arc::new(inner_list_field));
412
413        let struct_fields = vec![
414            Field::new("id", DataType::Int32, false),
415            Field::new("tags", list_type, true),
416        ];
417        let struct_type = DataType::Struct(struct_fields.into());
418
419        let result = arrow_type_to_json(&struct_type).unwrap();
420        assert_eq!(result.r#type, "struct");
421        let json_fields = result.fields.unwrap();
422        assert_eq!(json_fields.len(), 2);
423        assert_eq!(json_fields[0].name, "id");
424        assert_eq!(json_fields[1].name, "tags");
425        assert_eq!(json_fields[1].r#type.r#type, "list");
426
427        // Check nested list structure
428        let list_fields = json_fields[1].r#type.fields.as_ref().unwrap();
429        assert_eq!(list_fields.len(), 1);
430        assert_eq!(list_fields[0].r#type.r#type, "utf8");
431    }
432
433    #[test]
434    fn test_map_type_unsupported() {
435        use arrow::datatypes::Field;
436
437        let key_field = Field::new("keys", DataType::Utf8, false);
438        let value_field = Field::new("values", DataType::Int32, true);
439        let map_type = DataType::Map(
440            Arc::new(Field::new(
441                "entries",
442                DataType::Struct(vec![key_field, value_field].into()),
443                false,
444            )),
445            false,
446        );
447
448        let result = arrow_type_to_json(&map_type);
449        assert!(result.is_err());
450        assert!(result
451            .unwrap_err()
452            .to_string()
453            .contains("Map type is not supported"));
454    }
455
456    #[test]
457    fn test_additional_types() {
458        // Test Date types
459        let date32 = arrow_type_to_json(&DataType::Date32).unwrap();
460        assert_eq!(date32.r#type, "date32");
461
462        let date64 = arrow_type_to_json(&DataType::Date64).unwrap();
463        assert_eq!(date64.r#type, "date64");
464
465        // Test FixedSizeBinary
466        let fixed_binary = arrow_type_to_json(&DataType::FixedSizeBinary(16)).unwrap();
467        assert_eq!(fixed_binary.r#type, "fixed_size_binary");
468        assert_eq!(fixed_binary.length, Some(16));
469
470        // Test Float16
471        let float16 = arrow_type_to_json(&DataType::Float16).unwrap();
472        assert_eq!(float16.r#type, "float16");
473    }
474}