1use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
10use lance_core::{Error, Result};
11use lance_namespace_reqwest_client::models::{JsonArrowDataType, JsonArrowField, JsonArrowSchema};
12use snafu::Location;
13
14pub fn arrow_schema_to_json(arrow_schema: &ArrowSchema) -> Result<JsonArrowSchema> {
16 let fields: Result<Vec<JsonArrowField>> = arrow_schema
17 .fields()
18 .iter()
19 .map(|f| arrow_field_to_json(f.as_ref()))
20 .collect();
21
22 let metadata = if arrow_schema.metadata().is_empty() {
23 None
24 } else {
25 Some(arrow_schema.metadata().clone())
26 };
27
28 Ok(JsonArrowSchema {
29 fields: fields?,
30 metadata,
31 })
32}
33
34fn arrow_field_to_json(arrow_field: &Field) -> Result<JsonArrowField> {
36 let data_type = arrow_type_to_json(arrow_field.data_type())?;
37
38 Ok(JsonArrowField {
39 name: arrow_field.name().clone(),
40 nullable: arrow_field.is_nullable(),
41 r#type: Box::new(data_type),
42 metadata: if arrow_field.metadata().is_empty() {
43 None
44 } else {
45 Some(arrow_field.metadata().clone())
46 },
47 })
48}
49
50fn arrow_type_to_json(data_type: &DataType) -> Result<JsonArrowDataType> {
52 match data_type {
53 DataType::Null => Ok(JsonArrowDataType::new("null".to_string())),
55 DataType::Boolean => Ok(JsonArrowDataType::new("bool".to_string())),
56 DataType::Int8 => Ok(JsonArrowDataType::new("int8".to_string())),
57 DataType::UInt8 => Ok(JsonArrowDataType::new("uint8".to_string())),
58 DataType::Int16 => Ok(JsonArrowDataType::new("int16".to_string())),
59 DataType::UInt16 => Ok(JsonArrowDataType::new("uint16".to_string())),
60 DataType::Int32 => Ok(JsonArrowDataType::new("int32".to_string())),
61 DataType::UInt32 => Ok(JsonArrowDataType::new("uint32".to_string())),
62 DataType::Int64 => Ok(JsonArrowDataType::new("int64".to_string())),
63 DataType::UInt64 => Ok(JsonArrowDataType::new("uint64".to_string())),
64 DataType::Float16 => Ok(JsonArrowDataType::new("float16".to_string())),
65 DataType::Float32 => Ok(JsonArrowDataType::new("float32".to_string())),
66 DataType::Float64 => Ok(JsonArrowDataType::new("float64".to_string())),
67 DataType::Decimal32(precision, scale) => {
68 let mut dt = JsonArrowDataType::new("decimal32".to_string());
69 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
71 }
72 DataType::Decimal64(precision, scale) => {
73 let mut dt = JsonArrowDataType::new("decimal64".to_string());
74 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
76 }
77 DataType::Decimal128(precision, scale) => {
78 let mut dt = JsonArrowDataType::new("decimal128".to_string());
79 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
81 }
82 DataType::Decimal256(precision, scale) => {
83 let mut dt = JsonArrowDataType::new("decimal256".to_string());
84 dt.length = Some(*precision as i64 * 1000 + *scale as i64); Ok(dt)
86 }
87 DataType::Date32 => Ok(JsonArrowDataType::new("date32".to_string())),
88 DataType::Date64 => Ok(JsonArrowDataType::new("date64".to_string())),
89 DataType::Time32(_) => Ok(JsonArrowDataType::new("time32".to_string())),
90 DataType::Time64(_) => Ok(JsonArrowDataType::new("time64".to_string())),
91 DataType::Timestamp(_, _tz) => {
92 Ok(JsonArrowDataType::new("timestamp".to_string()))
94 }
95 DataType::Duration(_) => Ok(JsonArrowDataType::new("duration".to_string())),
96 DataType::Interval(_) => Ok(JsonArrowDataType::new("interval".to_string())),
97
98 DataType::Utf8 => Ok(JsonArrowDataType::new("utf8".to_string())),
100 DataType::LargeUtf8 => Ok(JsonArrowDataType::new("large_utf8".to_string())),
101 DataType::Binary => Ok(JsonArrowDataType::new("binary".to_string())),
102 DataType::LargeBinary => Ok(JsonArrowDataType::new("large_binary".to_string())),
103 DataType::FixedSizeBinary(size) => {
104 let mut dt = JsonArrowDataType::new("fixed_size_binary".to_string());
105 dt.length = Some(*size as i64);
106 Ok(dt)
107 }
108
109 DataType::List(field) => {
111 let inner_type = arrow_type_to_json(field.data_type())?;
112 let inner_field = JsonArrowField {
113 name: field.name().clone(),
114 nullable: field.is_nullable(),
115 r#type: Box::new(inner_type),
116 metadata: if field.metadata().is_empty() {
117 None
118 } else {
119 Some(field.metadata().clone())
120 },
121 };
122 Ok(JsonArrowDataType {
123 r#type: "list".to_string(),
124 fields: Some(vec![inner_field]),
125 length: None,
126 })
127 }
128 DataType::LargeList(field) => {
129 let inner_type = arrow_type_to_json(field.data_type())?;
130 let inner_field = JsonArrowField {
131 name: field.name().clone(),
132 nullable: field.is_nullable(),
133 r#type: Box::new(inner_type),
134 metadata: if field.metadata().is_empty() {
135 None
136 } else {
137 Some(field.metadata().clone())
138 },
139 };
140 Ok(JsonArrowDataType {
141 r#type: "large_list".to_string(),
142 fields: Some(vec![inner_field]),
143 length: None,
144 })
145 }
146 DataType::FixedSizeList(field, size) => {
147 let inner_type = arrow_type_to_json(field.data_type())?;
148 let inner_field = JsonArrowField {
149 name: field.name().clone(),
150 nullable: field.is_nullable(),
151 r#type: Box::new(inner_type),
152 metadata: if field.metadata().is_empty() {
153 None
154 } else {
155 Some(field.metadata().clone())
156 },
157 };
158 Ok(JsonArrowDataType {
159 r#type: "fixed_size_list".to_string(),
160 fields: Some(vec![inner_field]),
161 length: Some(*size as i64),
162 })
163 }
164 DataType::Struct(fields) => {
165 let json_fields: Result<Vec<JsonArrowField>> = fields
166 .iter()
167 .map(|f| arrow_field_to_json(f.as_ref()))
168 .collect();
169 Ok(JsonArrowDataType {
170 r#type: "struct".to_string(),
171 fields: Some(json_fields?),
172 length: None,
173 })
174 }
175 DataType::Union(_, _) => {
176 Ok(JsonArrowDataType::new("union".to_string()))
178 }
179 DataType::Dictionary(_, value_type) => {
180 arrow_type_to_json(value_type)
182 }
183
184 DataType::Map(_, _) => Err(Error::Namespace {
186 source: "Map type is not supported by Lance".into(),
187 location: Location::new(file!(), line!(), column!()),
188 }),
189 DataType::RunEndEncoded(_, _) => Err(Error::Namespace {
190 source: format!(
191 "RunEndEncoded type is not yet supported for JSON conversion: {:?}",
192 data_type
193 )
194 .into(),
195 location: Location::new(file!(), line!(), column!()),
196 }),
197 DataType::ListView(_) | DataType::LargeListView(_) => Err(Error::Namespace {
198 source: format!(
199 "ListView types are not yet supported for JSON conversion: {:?}",
200 data_type
201 )
202 .into(),
203 location: Location::new(file!(), line!(), column!()),
204 }),
205 DataType::Utf8View | DataType::BinaryView => Err(Error::Namespace {
206 source: format!(
207 "View types are not yet supported for JSON conversion: {:?}",
208 data_type
209 )
210 .into(),
211 location: Location::new(file!(), line!(), column!()),
212 }),
213 }
214}
215
216pub fn convert_json_arrow_schema(json_schema: &JsonArrowSchema) -> Result<ArrowSchema> {
218 let fields: Result<Vec<Field>> = json_schema
219 .fields
220 .iter()
221 .map(convert_json_arrow_field)
222 .collect();
223
224 let metadata = json_schema.metadata.as_ref().cloned().unwrap_or_default();
225
226 Ok(ArrowSchema::new_with_metadata(fields?, metadata))
227}
228
229pub fn convert_json_arrow_field(json_field: &JsonArrowField) -> Result<Field> {
231 let data_type = convert_json_arrow_type(&json_field.r#type)?;
232 let nullable = json_field.nullable;
233
234 Ok(Field::new(&json_field.name, data_type, nullable))
235}
236
237pub fn convert_json_arrow_type(json_type: &JsonArrowDataType) -> Result<DataType> {
239 let type_name = json_type.r#type.to_lowercase();
240
241 match type_name.as_str() {
242 "null" => Ok(DataType::Null),
243 "bool" | "boolean" => Ok(DataType::Boolean),
244 "int8" => Ok(DataType::Int8),
245 "uint8" => Ok(DataType::UInt8),
246 "int16" => Ok(DataType::Int16),
247 "uint16" => Ok(DataType::UInt16),
248 "int32" => Ok(DataType::Int32),
249 "uint32" => Ok(DataType::UInt32),
250 "int64" => Ok(DataType::Int64),
251 "uint64" => Ok(DataType::UInt64),
252 "float32" => Ok(DataType::Float32),
253 "float64" => Ok(DataType::Float64),
254 "utf8" => Ok(DataType::Utf8),
255 "binary" => Ok(DataType::Binary),
256 _ => Err(Error::Namespace {
257 source: format!("Unsupported Arrow type: {}", type_name).into(),
258 location: Location::new(file!(), line!(), column!()),
259 }),
260 }
261}
262
263#[cfg(test)]
264mod tests {
265 use super::*;
266 use std::collections::HashMap;
267 use std::sync::Arc;
268
269 #[test]
270 fn test_convert_basic_types() {
271 let int_type = JsonArrowDataType::new("int32".to_string());
273 let result = convert_json_arrow_type(&int_type).unwrap();
274 assert_eq!(result, DataType::Int32);
275
276 let string_type = JsonArrowDataType::new("utf8".to_string());
278 let result = convert_json_arrow_type(&string_type).unwrap();
279 assert_eq!(result, DataType::Utf8);
280
281 let float_type = JsonArrowDataType::new("float64".to_string());
283 let result = convert_json_arrow_type(&float_type).unwrap();
284 assert_eq!(result, DataType::Float64);
285
286 let binary_type = JsonArrowDataType::new("binary".to_string());
288 let result = convert_json_arrow_type(&binary_type).unwrap();
289 assert_eq!(result, DataType::Binary);
290 }
291
292 #[test]
293 fn test_convert_field() {
294 let int_type = JsonArrowDataType::new("int32".to_string());
295 let field = JsonArrowField {
296 name: "test_field".to_string(),
297 r#type: Box::new(int_type),
298 nullable: false,
299 metadata: None,
300 };
301
302 let result = convert_json_arrow_field(&field).unwrap();
303 assert_eq!(result.name(), "test_field");
304 assert_eq!(result.data_type(), &DataType::Int32);
305 assert!(!result.is_nullable());
306 }
307
308 #[test]
309 fn test_convert_schema() {
310 let int_type = JsonArrowDataType::new("int32".to_string());
311 let string_type = JsonArrowDataType::new("utf8".to_string());
312
313 let id_field = JsonArrowField {
314 name: "id".to_string(),
315 r#type: Box::new(int_type),
316 nullable: false,
317 metadata: None,
318 };
319
320 let name_field = JsonArrowField {
321 name: "name".to_string(),
322 r#type: Box::new(string_type),
323 nullable: true,
324 metadata: None,
325 };
326
327 let mut metadata = HashMap::new();
328 metadata.insert("key".to_string(), "value".to_string());
329
330 let schema = JsonArrowSchema {
331 fields: vec![id_field, name_field],
332 metadata: Some(metadata.clone()),
333 };
334
335 let result = convert_json_arrow_schema(&schema).unwrap();
336 assert_eq!(result.fields().len(), 2);
337 assert_eq!(result.field(0).name(), "id");
338 assert_eq!(result.field(1).name(), "name");
339 assert_eq!(result.metadata(), &metadata);
340 }
341
342 #[test]
343 fn test_unsupported_type() {
344 let unsupported_type = JsonArrowDataType::new("unsupported".to_string());
345 let result = convert_json_arrow_type(&unsupported_type);
346 assert!(result.is_err());
347 assert!(result
348 .unwrap_err()
349 .to_string()
350 .contains("Unsupported Arrow type"));
351 }
352
353 #[test]
354 fn test_list_type() {
355 use arrow::datatypes::Field;
356
357 let inner_field = Field::new("item", DataType::Int32, true);
358 let list_type = DataType::List(Arc::new(inner_field));
359
360 let result = arrow_type_to_json(&list_type).unwrap();
361 assert_eq!(result.r#type, "list");
362 assert!(result.fields.is_some());
363 let fields = result.fields.unwrap();
364 assert_eq!(fields.len(), 1);
365 assert_eq!(fields[0].name, "item");
366 assert_eq!(fields[0].r#type.r#type, "int32");
367 }
368
369 #[test]
370 fn test_struct_type() {
371 use arrow::datatypes::Field;
372
373 let fields = vec![
374 Field::new("id", DataType::Int64, false),
375 Field::new("name", DataType::Utf8, true),
376 ];
377 let struct_type = DataType::Struct(fields.into());
378
379 let result = arrow_type_to_json(&struct_type).unwrap();
380 assert_eq!(result.r#type, "struct");
381 assert!(result.fields.is_some());
382 let json_fields = result.fields.unwrap();
383 assert_eq!(json_fields.len(), 2);
384 assert_eq!(json_fields[0].name, "id");
385 assert_eq!(json_fields[0].r#type.r#type, "int64");
386 assert_eq!(json_fields[1].name, "name");
387 assert_eq!(json_fields[1].r#type.r#type, "utf8");
388 }
389
390 #[test]
391 fn test_fixed_size_list_type() {
392 use arrow::datatypes::Field;
393
394 let inner_field = Field::new("item", DataType::Float32, false);
395 let fixed_list_type = DataType::FixedSizeList(Arc::new(inner_field), 3);
396
397 let result = arrow_type_to_json(&fixed_list_type).unwrap();
398 assert_eq!(result.r#type, "fixed_size_list");
399 assert_eq!(result.length, Some(3));
400 assert!(result.fields.is_some());
401 let fields = result.fields.unwrap();
402 assert_eq!(fields.len(), 1);
403 assert_eq!(fields[0].r#type.r#type, "float32");
404 }
405
406 #[test]
407 fn test_nested_struct_with_list() {
408 use arrow::datatypes::Field;
409
410 let inner_list_field = Field::new("item", DataType::Utf8, true);
411 let list_type = DataType::List(Arc::new(inner_list_field));
412
413 let struct_fields = vec![
414 Field::new("id", DataType::Int32, false),
415 Field::new("tags", list_type, true),
416 ];
417 let struct_type = DataType::Struct(struct_fields.into());
418
419 let result = arrow_type_to_json(&struct_type).unwrap();
420 assert_eq!(result.r#type, "struct");
421 let json_fields = result.fields.unwrap();
422 assert_eq!(json_fields.len(), 2);
423 assert_eq!(json_fields[0].name, "id");
424 assert_eq!(json_fields[1].name, "tags");
425 assert_eq!(json_fields[1].r#type.r#type, "list");
426
427 let list_fields = json_fields[1].r#type.fields.as_ref().unwrap();
429 assert_eq!(list_fields.len(), 1);
430 assert_eq!(list_fields[0].r#type.r#type, "utf8");
431 }
432
433 #[test]
434 fn test_map_type_unsupported() {
435 use arrow::datatypes::Field;
436
437 let key_field = Field::new("keys", DataType::Utf8, false);
438 let value_field = Field::new("values", DataType::Int32, true);
439 let map_type = DataType::Map(
440 Arc::new(Field::new(
441 "entries",
442 DataType::Struct(vec![key_field, value_field].into()),
443 false,
444 )),
445 false,
446 );
447
448 let result = arrow_type_to_json(&map_type);
449 assert!(result.is_err());
450 assert!(result
451 .unwrap_err()
452 .to_string()
453 .contains("Map type is not supported"));
454 }
455
456 #[test]
457 fn test_additional_types() {
458 let date32 = arrow_type_to_json(&DataType::Date32).unwrap();
460 assert_eq!(date32.r#type, "date32");
461
462 let date64 = arrow_type_to_json(&DataType::Date64).unwrap();
463 assert_eq!(date64.r#type, "date64");
464
465 let fixed_binary = arrow_type_to_json(&DataType::FixedSizeBinary(16)).unwrap();
467 assert_eq!(fixed_binary.r#type, "fixed_size_binary");
468 assert_eq!(fixed_binary.length, Some(16));
469
470 let float16 = arrow_type_to_json(&DataType::Float16).unwrap();
472 assert_eq!(float16.r#type, "float16");
473 }
474}