1use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
10use lance_core::{Error, Result};
11use lance_namespace_reqwest_client::models::{JsonArrowDataType, JsonArrowField, JsonArrowSchema};
12use snafu::Location;
13
14pub fn arrow_schema_to_json(arrow_schema: &ArrowSchema) -> Result<JsonArrowSchema> {
16 let fields: Result<Vec<JsonArrowField>> = arrow_schema
17 .fields()
18 .iter()
19 .map(|f| arrow_field_to_json(f.as_ref()))
20 .collect();
21
22 let metadata = if arrow_schema.metadata().is_empty() {
23 None
24 } else {
25 Some(arrow_schema.metadata().clone())
26 };
27
28 Ok(JsonArrowSchema {
29 fields: fields?,
30 metadata,
31 })
32}
33
34fn arrow_field_to_json(arrow_field: &Field) -> Result<JsonArrowField> {
36 let data_type = arrow_type_to_json(arrow_field.data_type())?;
37
38 Ok(JsonArrowField {
39 name: arrow_field.name().clone(),
40 nullable: arrow_field.is_nullable(),
41 r#type: Box::new(data_type),
42 metadata: if arrow_field.metadata().is_empty() {
43 None
44 } else {
45 Some(arrow_field.metadata().clone())
46 },
47 })
48}
49
50fn arrow_type_to_json(data_type: &DataType) -> Result<JsonArrowDataType> {
52 match data_type {
53 DataType::Null => Ok(JsonArrowDataType::new("null".to_string())),
55 DataType::Boolean => Ok(JsonArrowDataType::new("bool".to_string())),
56 DataType::Int8 => Ok(JsonArrowDataType::new("int8".to_string())),
57 DataType::UInt8 => Ok(JsonArrowDataType::new("uint8".to_string())),
58 DataType::Int16 => Ok(JsonArrowDataType::new("int16".to_string())),
59 DataType::UInt16 => Ok(JsonArrowDataType::new("uint16".to_string())),
60 DataType::Int32 => Ok(JsonArrowDataType::new("int32".to_string())),
61 DataType::UInt32 => Ok(JsonArrowDataType::new("uint32".to_string())),
62 DataType::Int64 => Ok(JsonArrowDataType::new("int64".to_string())),
63 DataType::UInt64 => Ok(JsonArrowDataType::new("uint64".to_string())),
64 DataType::Float16 => Ok(JsonArrowDataType::new("float16".to_string())),
65 DataType::Float32 => Ok(JsonArrowDataType::new("float32".to_string())),
66 DataType::Float64 => Ok(JsonArrowDataType::new("float64".to_string())),
67 DataType::Decimal32(precision, scale) => {
68 let mut dt = JsonArrowDataType::new("decimal32".to_string());
69 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
71 }
72 DataType::Decimal64(precision, scale) => {
73 let mut dt = JsonArrowDataType::new("decimal64".to_string());
74 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
76 }
77 DataType::Decimal128(precision, scale) => {
78 let mut dt = JsonArrowDataType::new("decimal128".to_string());
79 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
81 }
82 DataType::Decimal256(precision, scale) => {
83 let mut dt = JsonArrowDataType::new("decimal256".to_string());
84 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
86 }
87 DataType::Date32 => Ok(JsonArrowDataType::new("date32".to_string())),
88 DataType::Date64 => Ok(JsonArrowDataType::new("date64".to_string())),
89 DataType::Time32(_) => Ok(JsonArrowDataType::new("time32".to_string())),
90 DataType::Time64(_) => Ok(JsonArrowDataType::new("time64".to_string())),
91 DataType::Timestamp(_, _tz) => {
92 Ok(JsonArrowDataType::new("timestamp".to_string()))
94 }
95 DataType::Duration(_) => Ok(JsonArrowDataType::new("duration".to_string())),
96 DataType::Interval(_) => Ok(JsonArrowDataType::new("interval".to_string())),
97
98 DataType::Utf8 => Ok(JsonArrowDataType::new("utf8".to_string())),
100 DataType::LargeUtf8 => Ok(JsonArrowDataType::new("large_utf8".to_string())),
101 DataType::Binary => Ok(JsonArrowDataType::new("binary".to_string())),
102 DataType::LargeBinary => Ok(JsonArrowDataType::new("large_binary".to_string())),
103 DataType::FixedSizeBinary(size) => {
104 let mut dt = JsonArrowDataType::new("fixed_size_binary".to_string());
105 dt.length = Some(*size as i64);
106 Ok(dt)
107 }
108
109 DataType::List(field) => {
111 let inner_type = arrow_type_to_json(field.data_type())?;
112 let inner_field = JsonArrowField {
113 name: field.name().clone(),
114 nullable: field.is_nullable(),
115 r#type: Box::new(inner_type),
116 metadata: if field.metadata().is_empty() {
117 None
118 } else {
119 Some(field.metadata().clone())
120 },
121 };
122 Ok(JsonArrowDataType {
123 r#type: "list".to_string(),
124 fields: Some(vec![inner_field]),
125 length: None,
126 })
127 }
128 DataType::LargeList(field) => {
129 let inner_type = arrow_type_to_json(field.data_type())?;
130 let inner_field = JsonArrowField {
131 name: field.name().clone(),
132 nullable: field.is_nullable(),
133 r#type: Box::new(inner_type),
134 metadata: if field.metadata().is_empty() {
135 None
136 } else {
137 Some(field.metadata().clone())
138 },
139 };
140 Ok(JsonArrowDataType {
141 r#type: "large_list".to_string(),
142 fields: Some(vec![inner_field]),
143 length: None,
144 })
145 }
146 DataType::FixedSizeList(field, size) => {
147 let inner_type = arrow_type_to_json(field.data_type())?;
148 let inner_field = JsonArrowField {
149 name: field.name().clone(),
150 nullable: field.is_nullable(),
151 r#type: Box::new(inner_type),
152 metadata: if field.metadata().is_empty() {
153 None
154 } else {
155 Some(field.metadata().clone())
156 },
157 };
158 Ok(JsonArrowDataType {
159 r#type: "fixed_size_list".to_string(),
160 fields: Some(vec![inner_field]),
161 length: Some(*size as i64),
162 })
163 }
164 DataType::Struct(fields) => {
165 let json_fields: Result<Vec<JsonArrowField>> = fields
166 .iter()
167 .map(|f| arrow_field_to_json(f.as_ref()))
168 .collect();
169 Ok(JsonArrowDataType {
170 r#type: "struct".to_string(),
171 fields: Some(json_fields?),
172 length: None,
173 })
174 }
175 DataType::Union(_, _) => {
176 Ok(JsonArrowDataType::new("union".to_string()))
178 }
179 DataType::Dictionary(_, value_type) => {
180 arrow_type_to_json(value_type)
182 }
183
184 DataType::Map(entries_field, keys_sorted) => {
185 if *keys_sorted {
186 return Err(Error::Namespace {
187 source: format!(
188 "Map types with keys_sorted=true are not yet supported for JSON conversion: {:?}",
189 data_type
190 )
191 .into(),
192 location: Location::new(file!(), line!(), column!()),
193 });
194 }
195 let inner_type = arrow_type_to_json(entries_field.data_type())?;
196 let inner_field = JsonArrowField {
197 name: entries_field.name().clone(),
198 nullable: entries_field.is_nullable(),
199 r#type: Box::new(inner_type),
200 metadata: if entries_field.metadata().is_empty() {
201 None
202 } else {
203 Some(entries_field.metadata().clone())
204 },
205 };
206 Ok(JsonArrowDataType {
207 r#type: "map".to_string(),
208 fields: Some(vec![inner_field]),
209 length: None,
210 })
211 }
212
213 DataType::RunEndEncoded(_, _) => Err(Error::Namespace {
215 source: format!(
216 "RunEndEncoded type is not yet supported for JSON conversion: {:?}",
217 data_type
218 )
219 .into(),
220 location: Location::new(file!(), line!(), column!()),
221 }),
222 DataType::ListView(_) | DataType::LargeListView(_) => Err(Error::Namespace {
223 source: format!(
224 "ListView types are not yet supported for JSON conversion: {:?}",
225 data_type
226 )
227 .into(),
228 location: Location::new(file!(), line!(), column!()),
229 }),
230 DataType::Utf8View | DataType::BinaryView => Err(Error::Namespace {
231 source: format!(
232 "View types are not yet supported for JSON conversion: {:?}",
233 data_type
234 )
235 .into(),
236 location: Location::new(file!(), line!(), column!()),
237 }),
238 }
239}
240
241pub fn convert_json_arrow_schema(json_schema: &JsonArrowSchema) -> Result<ArrowSchema> {
243 let fields: Result<Vec<Field>> = json_schema
244 .fields
245 .iter()
246 .map(convert_json_arrow_field)
247 .collect();
248
249 let metadata = json_schema.metadata.as_ref().cloned().unwrap_or_default();
250
251 Ok(ArrowSchema::new_with_metadata(fields?, metadata))
252}
253
254pub fn convert_json_arrow_field(json_field: &JsonArrowField) -> Result<Field> {
256 let data_type = convert_json_arrow_type(&json_field.r#type)?;
257 let nullable = json_field.nullable;
258
259 let field = Field::new(&json_field.name, data_type, nullable);
260 Ok(match json_field.metadata.as_ref() {
261 Some(metadata) => field.with_metadata(metadata.clone()),
262 None => field,
263 })
264}
265
266pub fn convert_json_arrow_type(json_type: &JsonArrowDataType) -> Result<DataType> {
268 let type_name = json_type.r#type.to_lowercase();
269
270 match type_name.as_str() {
271 "null" => Ok(DataType::Null),
272 "bool" | "boolean" => Ok(DataType::Boolean),
273 "int8" => Ok(DataType::Int8),
274 "uint8" => Ok(DataType::UInt8),
275 "int16" => Ok(DataType::Int16),
276 "uint16" => Ok(DataType::UInt16),
277 "int32" => Ok(DataType::Int32),
278 "uint32" => Ok(DataType::UInt32),
279 "int64" => Ok(DataType::Int64),
280 "uint64" => Ok(DataType::UInt64),
281 "float32" => Ok(DataType::Float32),
282 "float64" => Ok(DataType::Float64),
283 "utf8" => Ok(DataType::Utf8),
284 "binary" => Ok(DataType::Binary),
285 _ => Err(Error::Namespace {
286 source: format!("Unsupported Arrow type: {}", type_name).into(),
287 location: Location::new(file!(), line!(), column!()),
288 }),
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295 use std::collections::HashMap;
296 use std::sync::Arc;
297
298 #[test]
299 fn test_extension_metadata_preserved_in_json_roundtrip() {
300 const ARROW_EXT_NAME_KEY: &str = "ARROW:extension:name";
301 const LANCE_JSON_EXT_NAME: &str = "lance.json";
302
303 let meta_field =
304 Field::new("meta", DataType::Binary, true).with_metadata(HashMap::from([(
305 ARROW_EXT_NAME_KEY.to_string(),
306 LANCE_JSON_EXT_NAME.to_string(),
307 )]));
308 let arrow_schema =
309 ArrowSchema::new(vec![Field::new("id", DataType::Int32, false), meta_field]);
310
311 let json_schema = arrow_schema_to_json(&arrow_schema).unwrap();
312 let meta_json_field = json_schema
313 .fields
314 .iter()
315 .find(|f| f.name == "meta")
316 .unwrap();
317 assert!(meta_json_field
318 .metadata
319 .as_ref()
320 .unwrap()
321 .contains_key(ARROW_EXT_NAME_KEY));
322
323 let roundtrip = convert_json_arrow_schema(&json_schema).unwrap();
324 let meta_field = roundtrip.field_with_name("meta").unwrap();
325 assert_eq!(
326 meta_field.metadata().get(ARROW_EXT_NAME_KEY),
327 Some(&LANCE_JSON_EXT_NAME.to_string())
328 );
329 }
330
331 #[test]
332 fn test_convert_basic_types() {
333 let int_type = JsonArrowDataType::new("int32".to_string());
335 let result = convert_json_arrow_type(&int_type).unwrap();
336 assert_eq!(result, DataType::Int32);
337
338 let string_type = JsonArrowDataType::new("utf8".to_string());
340 let result = convert_json_arrow_type(&string_type).unwrap();
341 assert_eq!(result, DataType::Utf8);
342
343 let float_type = JsonArrowDataType::new("float64".to_string());
345 let result = convert_json_arrow_type(&float_type).unwrap();
346 assert_eq!(result, DataType::Float64);
347
348 let binary_type = JsonArrowDataType::new("binary".to_string());
350 let result = convert_json_arrow_type(&binary_type).unwrap();
351 assert_eq!(result, DataType::Binary);
352 }
353
354 #[test]
355 fn test_convert_field() {
356 let int_type = JsonArrowDataType::new("int32".to_string());
357 let field = JsonArrowField {
358 name: "test_field".to_string(),
359 r#type: Box::new(int_type),
360 nullable: false,
361 metadata: None,
362 };
363
364 let result = convert_json_arrow_field(&field).unwrap();
365 assert_eq!(result.name(), "test_field");
366 assert_eq!(result.data_type(), &DataType::Int32);
367 assert!(!result.is_nullable());
368 }
369
370 #[test]
371 fn test_convert_schema() {
372 let int_type = JsonArrowDataType::new("int32".to_string());
373 let string_type = JsonArrowDataType::new("utf8".to_string());
374
375 let id_field = JsonArrowField {
376 name: "id".to_string(),
377 r#type: Box::new(int_type),
378 nullable: false,
379 metadata: None,
380 };
381
382 let name_field = JsonArrowField {
383 name: "name".to_string(),
384 r#type: Box::new(string_type),
385 nullable: true,
386 metadata: None,
387 };
388
389 let mut metadata = HashMap::new();
390 metadata.insert("key".to_string(), "value".to_string());
391
392 let schema = JsonArrowSchema {
393 fields: vec![id_field, name_field],
394 metadata: Some(metadata.clone()),
395 };
396
397 let result = convert_json_arrow_schema(&schema).unwrap();
398 assert_eq!(result.fields().len(), 2);
399 assert_eq!(result.field(0).name(), "id");
400 assert_eq!(result.field(1).name(), "name");
401 assert_eq!(result.metadata(), &metadata);
402 }
403
404 #[test]
405 fn test_unsupported_type() {
406 let unsupported_type = JsonArrowDataType::new("unsupported".to_string());
407 let result = convert_json_arrow_type(&unsupported_type);
408 assert!(result.is_err());
409 assert!(result
410 .unwrap_err()
411 .to_string()
412 .contains("Unsupported Arrow type"));
413 }
414
415 #[test]
416 fn test_list_type() {
417 use arrow::datatypes::Field;
418
419 let inner_field = Field::new("item", DataType::Int32, true);
420 let list_type = DataType::List(Arc::new(inner_field));
421
422 let result = arrow_type_to_json(&list_type).unwrap();
423 assert_eq!(result.r#type, "list");
424 assert!(result.fields.is_some());
425 let fields = result.fields.unwrap();
426 assert_eq!(fields.len(), 1);
427 assert_eq!(fields[0].name, "item");
428 assert_eq!(fields[0].r#type.r#type, "int32");
429 }
430
431 #[test]
432 fn test_struct_type() {
433 use arrow::datatypes::Field;
434
435 let fields = vec![
436 Field::new("id", DataType::Int64, false),
437 Field::new("name", DataType::Utf8, true),
438 ];
439 let struct_type = DataType::Struct(fields.into());
440
441 let result = arrow_type_to_json(&struct_type).unwrap();
442 assert_eq!(result.r#type, "struct");
443 assert!(result.fields.is_some());
444 let json_fields = result.fields.unwrap();
445 assert_eq!(json_fields.len(), 2);
446 assert_eq!(json_fields[0].name, "id");
447 assert_eq!(json_fields[0].r#type.r#type, "int64");
448 assert_eq!(json_fields[1].name, "name");
449 assert_eq!(json_fields[1].r#type.r#type, "utf8");
450 }
451
452 #[test]
453 fn test_fixed_size_list_type() {
454 use arrow::datatypes::Field;
455
456 let inner_field = Field::new("item", DataType::Float32, false);
457 let fixed_list_type = DataType::FixedSizeList(Arc::new(inner_field), 3);
458
459 let result = arrow_type_to_json(&fixed_list_type).unwrap();
460 assert_eq!(result.r#type, "fixed_size_list");
461 assert_eq!(result.length, Some(3));
462 assert!(result.fields.is_some());
463 let fields = result.fields.unwrap();
464 assert_eq!(fields.len(), 1);
465 assert_eq!(fields[0].r#type.r#type, "float32");
466 }
467
468 #[test]
469 fn test_nested_struct_with_list() {
470 use arrow::datatypes::Field;
471
472 let inner_list_field = Field::new("item", DataType::Utf8, true);
473 let list_type = DataType::List(Arc::new(inner_list_field));
474
475 let struct_fields = vec![
476 Field::new("id", DataType::Int32, false),
477 Field::new("tags", list_type, true),
478 ];
479 let struct_type = DataType::Struct(struct_fields.into());
480
481 let result = arrow_type_to_json(&struct_type).unwrap();
482 assert_eq!(result.r#type, "struct");
483 let json_fields = result.fields.unwrap();
484 assert_eq!(json_fields.len(), 2);
485 assert_eq!(json_fields[0].name, "id");
486 assert_eq!(json_fields[1].name, "tags");
487 assert_eq!(json_fields[1].r#type.r#type, "list");
488
489 let list_fields = json_fields[1].r#type.fields.as_ref().unwrap();
491 assert_eq!(list_fields.len(), 1);
492 assert_eq!(list_fields[0].r#type.r#type, "utf8");
493 }
494
495 #[test]
496 fn test_map_type_supported() {
497 use arrow::datatypes::Field;
498
499 let key_field = Field::new("keys", DataType::Utf8, false);
500 let value_field = Field::new("values", DataType::Int32, true);
501 let map_type = DataType::Map(
502 Arc::new(Field::new(
503 "entries",
504 DataType::Struct(vec![key_field, value_field].into()),
505 false,
506 )),
507 false,
508 );
509
510 let result = arrow_type_to_json(&map_type);
511 assert!(result.is_ok());
512 let json_type = result.unwrap();
513 assert_eq!(json_type.r#type, "map");
514 assert!(json_type.fields.is_some());
515
516 let fields = json_type.fields.unwrap();
517 assert_eq!(fields.len(), 1);
518 assert_eq!(fields[0].name, "entries");
519 assert_eq!(fields[0].r#type.r#type, "struct");
520 }
521
522 #[test]
523 fn test_additional_types() {
524 let date32 = arrow_type_to_json(&DataType::Date32).unwrap();
526 assert_eq!(date32.r#type, "date32");
527
528 let date64 = arrow_type_to_json(&DataType::Date64).unwrap();
529 assert_eq!(date64.r#type, "date64");
530
531 let fixed_binary = arrow_type_to_json(&DataType::FixedSizeBinary(16)).unwrap();
533 assert_eq!(fixed_binary.r#type, "fixed_size_binary");
534 assert_eq!(fixed_binary.length, Some(16));
535
536 let float16 = arrow_type_to_json(&DataType::Float16).unwrap();
538 assert_eq!(float16.r#type, "float16");
539 }
540}