#![doc(
html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg",
html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg"
)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![deny(rustdoc::broken_intra_doc_links)]
#![warn(missing_docs)]
pub mod reader;
pub mod writer;
pub use self::reader::{Reader, ReaderBuilder};
pub use self::writer::{
ArrayWriter, Encoder, EncoderFactory, EncoderOptions, LineDelimitedWriter, Writer,
WriterBuilder,
};
use half::f16;
use serde_json::{Number, Value};
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
pub enum StructMode {
#[default]
ObjectOnly,
ListOnly,
}
pub trait JsonSerializable: 'static {
fn into_json_value(self) -> Option<Value>;
}
macro_rules! json_serializable {
($t:ty) => {
impl JsonSerializable for $t {
fn into_json_value(self) -> Option<Value> {
Some(self.into())
}
}
};
}
json_serializable!(bool);
json_serializable!(u8);
json_serializable!(u16);
json_serializable!(u32);
json_serializable!(u64);
json_serializable!(i8);
json_serializable!(i16);
json_serializable!(i32);
json_serializable!(i64);
impl JsonSerializable for i128 {
fn into_json_value(self) -> Option<Value> {
Some(self.to_string().into())
}
}
impl JsonSerializable for f16 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(f64::round(f64::from(self) * 1000.0) / 1000.0).map(Value::Number)
}
}
impl JsonSerializable for f32 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(f64::round(self as f64 * 1000.0) / 1000.0).map(Value::Number)
}
}
impl JsonSerializable for f64 {
fn into_json_value(self) -> Option<Value> {
Number::from_f64(self).map(Value::Number)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use crate::writer::JsonArray;
use super::*;
use arrow_array::{
ArrayRef, GenericBinaryArray, GenericByteViewArray, RecordBatch, RecordBatchWriter,
builder::FixedSizeBinaryBuilder, types::BinaryViewType,
};
use serde_json::Value::{Bool, Number as VNumber, String as VString};
#[test]
fn test_arrow_native_type_to_json() {
assert_eq!(Some(Bool(true)), true.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i8.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i16.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i32.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1i64.into_json_value());
assert_eq!(Some(VString("1".to_string())), 1i128.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u8.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u16.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u32.into_json_value());
assert_eq!(Some(VNumber(Number::from(1))), 1u64.into_json_value());
assert_eq!(
Some(VNumber(Number::from_f64(0.01f64).unwrap())),
0.01.into_json_value()
);
assert_eq!(
Some(VNumber(Number::from_f64(0.01f64).unwrap())),
0.01f64.into_json_value()
);
assert_eq!(None, f32::NAN.into_json_value());
}
#[test]
fn test_json_roundtrip_structs() {
use crate::writer::LineDelimited;
use arrow_schema::DataType;
use arrow_schema::Field;
use arrow_schema::Fields;
use arrow_schema::Schema;
use std::sync::Arc;
let schema = Arc::new(Schema::new(vec![
Field::new(
"c1",
DataType::Struct(Fields::from(vec![
Field::new("c11", DataType::Int32, true),
Field::new(
"c12",
DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)].into()),
false,
),
])),
false,
),
Field::new("c2", DataType::Utf8, false),
]));
{
let object_input = r#"{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"}
{"c1":{"c12":{"c121":"f"}},"c2":"b"}
{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}
"#
.as_bytes();
let object_reader = ReaderBuilder::new(schema.clone())
.with_struct_mode(StructMode::ObjectOnly)
.build(object_input)
.unwrap();
let mut object_output: Vec<u8> = Vec::new();
let mut object_writer = WriterBuilder::new()
.with_struct_mode(StructMode::ObjectOnly)
.build::<_, LineDelimited>(&mut object_output);
for batch_res in object_reader {
object_writer.write(&batch_res.unwrap()).unwrap();
}
assert_eq!(object_input, &object_output);
}
{
let list_input = r#"[[1,["e"]],"a"]
[[null,["f"]],"b"]
[[5,["g"]],"c"]
"#
.as_bytes();
let list_reader = ReaderBuilder::new(schema.clone())
.with_struct_mode(StructMode::ListOnly)
.build(list_input)
.unwrap();
let mut list_output: Vec<u8> = Vec::new();
let mut list_writer = WriterBuilder::new()
.with_struct_mode(StructMode::ListOnly)
.build::<_, LineDelimited>(&mut list_output);
for batch_res in list_reader {
list_writer.write(&batch_res.unwrap()).unwrap();
}
assert_eq!(list_input, &list_output);
}
}
#[test]
#[allow(invalid_from_utf8)]
fn test_json_roundtrip_binary() {
let not_utf8: &[u8] = b"Not UTF8 \xa0\xa1!";
assert!(str::from_utf8(not_utf8).is_err());
let values: &[Option<&[u8]>] = &[
Some(b"Ned Flanders" as &[u8]),
None,
Some(b"Troy McClure" as &[u8]),
Some(not_utf8),
];
assert_binary_json(Arc::new(GenericBinaryArray::<i32>::from_iter(values)));
assert_binary_json(Arc::new(GenericBinaryArray::<i64>::from_iter(values)));
assert_binary_json(build_array_fixed_size_binary(12, values));
assert_binary_json(Arc::new(GenericByteViewArray::<BinaryViewType>::from_iter(
values,
)));
}
fn build_array_fixed_size_binary(byte_width: i32, values: &[Option<&[u8]>]) -> ArrayRef {
let mut builder = FixedSizeBinaryBuilder::new(byte_width);
for value in values {
match value {
Some(v) => builder.append_value(v).unwrap(),
None => builder.append_null(),
}
}
Arc::new(builder.finish())
}
fn assert_binary_json(array: ArrayRef) {
assert_binary_json_with_writer(
array.clone(),
WriterBuilder::new().with_explicit_nulls(true),
);
assert_binary_json_with_writer(array, WriterBuilder::new().with_explicit_nulls(false));
}
fn assert_binary_json_with_writer(array: ArrayRef, builder: WriterBuilder) {
let batch = RecordBatch::try_from_iter([("bytes", array)]).unwrap();
let mut buf = Vec::new();
let json_value: Value = {
let mut writer = builder.build::<_, JsonArray>(&mut buf);
writer.write(&batch).unwrap();
writer.close().unwrap();
serde_json::from_slice(&buf).unwrap()
};
let json_array = json_value.as_array().unwrap();
let decoded = {
let mut decoder = ReaderBuilder::new(batch.schema().clone())
.build_decoder()
.unwrap();
decoder.serialize(json_array).unwrap();
decoder.flush().unwrap().unwrap()
};
assert_eq!(batch, decoded);
}
}