use serde_json::Value as JsonJson;
use columnar::{Push, Len, Index};
use columnar::{Vecs, Strings, Lookbacks};
#[derive(Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum Json {
Null,
Bool(bool),
Number(Number),
String(String),
Array(Vec<Json>),
Object(Vec<(String, Json)>),
}
impl Json {
pub fn from_json(json: JsonJson) -> Self {
match json {
JsonJson::Null => { Json::Null },
JsonJson::Bool(b) => { Json::Bool(b) },
JsonJson::Number(n) => { Json::Number(Number(n)) },
JsonJson::String(s) => { Json::String(s) },
JsonJson::Array(a) => { Json::Array(a.into_iter().map(Json::from_json).collect()) },
JsonJson::Object(o) => {
let mut list: Vec<_> = o.into_iter().map(|(s,j)| (s, Json::from_json(j))).collect();
list.sort_by(|x,y| x.0.cmp(&y.0));
Json::Object(list)
},
}
}
}
#[derive(Copy, Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum JsonIdx {
Null,
Bool(bool),
Number(usize),
String(usize),
Array(usize),
Object(usize),
}
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Number (serde_json::Number);
impl std::ops::Deref for Number {
type Target = serde_json::Number;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Jsons {
pub roots: Vec<JsonIdx>, pub numbers: Vec<Number>, pub strings: Lookbacks<Strings>,
pub arrays: Vecs<Vec<JsonIdx>>,
pub objects: Vecs<(Lookbacks<Strings>, Vec<JsonIdx>)>,
}
#[derive(Debug)]
pub enum JsonsRef<'a> {
Null,
Bool(bool),
Number(&'a Number),
String(&'a [u8]),
Array(ArrRef<'a>),
Object(ObjRef<'a>),
}
#[derive(Debug)]
pub struct ArrRef<'a> {
pub index: usize,
pub store: &'a Jsons,
}
#[derive(Debug)]
pub struct ObjRef<'a> {
pub index: usize,
pub store: &'a Jsons,
}
impl<'a> PartialEq<Json> for JsonsRef<'a> {
#[inline(always)] fn eq(&self, other: &Json) -> bool {
match (self, other) {
(JsonsRef::Null, Json::Null) => { true },
(JsonsRef::Bool(b0), Json::Bool(b1)) => { b0 == b1 },
(JsonsRef::Number(n0), Json::Number(n1)) => { *n0 == n1 },
(JsonsRef::String(s0), Json::String(s1)) => { *s0 == s1.as_bytes() },
(JsonsRef::Array(a0), Json::Array(a1)) => {
let slice: columnar::Slice<&Vec<JsonIdx>> = (&a0.store.arrays).get(a0.index);
slice.len() == a1.len() && slice.into_iter().zip(a1).all(|(a,b)| a0.store.dereference(*a).eq(b))
},
(JsonsRef::Object(o0), Json::Object(o1)) => {
let slice: columnar::Slice<&(_, _)> = (&o0.store.objects).get(o0.index);
slice.len() == o1.len() && slice.into_iter().zip(o1).all(|((xs, xv),(ys, yv))| xs == ys.as_bytes() && o0.store.dereference(*xv).eq(yv))
},
_ => { false }
}
}
}
impl Push<Json> for Jsons {
fn push(&mut self, json: Json) {
let mut worker = JsonQueues::new_from(self);
let json_idx = worker.copy(&json);
worker.store.roots.push(json_idx);
worker.finish();
}
}
impl<'a> Push<&'a Json> for Jsons {
fn push(&mut self, json: &'a Json) {
let mut worker = JsonQueues::new_from(self);
let json_idx = worker.copy(json);
worker.store.roots.push(json_idx);
worker.finish();
}
fn extend(&mut self, jsons: impl IntoIterator<Item=&'a Json>) {
let mut worker = JsonQueues::new_from(self);
for json in jsons {
let json_idx = worker.copy(json);
worker.store.roots.push(json_idx);
worker.finish();
}
}
}
impl Len for Jsons {
fn len(&self) -> usize {
self.roots.len()
}
}
impl<'a> Index for &'a Jsons {
type Ref = JsonsRef<'a>;
#[inline(always)] fn get(&self, index: usize) -> Self::Ref {
self.dereference(self.roots[index])
}
}
impl Jsons {
#[inline(always)] pub fn dereference(&self, index: JsonIdx) -> JsonsRef<'_> {
match index {
JsonIdx::Null => JsonsRef::Null,
JsonIdx::Bool(i) => JsonsRef::Bool(i),
JsonIdx::Number(i) => JsonsRef::Number((&self.numbers).get(i)),
JsonIdx::String(i) => JsonsRef::String((&self.strings).get(i)),
JsonIdx::Array(i) => {
JsonsRef::Array(ArrRef {
index: i,
store: self,
})
},
JsonIdx::Object(i) => {
JsonsRef::Object(ObjRef {
index: i,
store: self,
})
}
}
}
}
struct JsonQueues<'a> {
arr_todo: std::collections::VecDeque<&'a [Json]>,
obj_todo: std::collections::VecDeque<&'a [(String, Json)]>,
store: &'a mut Jsons,
}
impl<'a> JsonQueues<'a> {
fn new_from(store: &'a mut Jsons) -> Self {
Self {
arr_todo: Default::default(),
obj_todo: Default::default(),
store,
}
}
fn copy(&mut self, json: &'a Json) -> JsonIdx {
match json {
Json::Null => JsonIdx::Null,
Json::Bool(b) => JsonIdx::Bool(*b),
Json::Number(n) => {
self.store.numbers.push(n.clone());
JsonIdx::Number(self.store.numbers.len() - 1)
},
Json::String(s) => {
self.store.strings.push(s.as_bytes());
JsonIdx::String(self.store.strings.len() - 1)
},
Json::Array(a) => {
self.arr_todo.push_back(a);
JsonIdx::Array(self.store.arrays.len() + self.arr_todo.len() - 1)
},
Json::Object(o) => {
self.obj_todo.push_back(o);
JsonIdx::Object(self.store.objects.len() + self.obj_todo.len() - 1)
},
}
}
fn finish(&mut self) {
let mut temp = Vec::default();
while !self.arr_todo.is_empty() || !self.obj_todo.is_empty() {
while let Some(json) = self.arr_todo.front().cloned() {
Extend::extend(&mut temp, json.iter().map(|v| self.copy(v)));
self.arr_todo.pop_front();
self.store.arrays.push_iter(temp.drain(..));
}
while let Some(pairs) = self.obj_todo.front().cloned() {
Extend::extend(&mut temp, pairs.iter().map(|(_,v)| self.copy(v)));
self.obj_todo.pop_front();
self.store.objects.push_iter(temp.drain(..).zip(pairs).map(|(v,(s,_))| (s.as_bytes(), v)));
}
}
}
}
fn main() {
use columnar::{Push, Len, Index};
use std::fs::File;
use serde_json::Value as JsonValue;
let timer = std::time::Instant::now();
let f = File::open("true.txt").unwrap();
let records: Vec<JsonValue> = serde_json::from_reader(f).unwrap();
let time = timer.elapsed();
println!("{:?}\tread {} json records", time, records.len());
let timer = std::time::Instant::now();
let _ = records.clone();
let time = timer.elapsed();
println!("{:?}\tjson_vals cloned", time);
let values = records.clone().into_iter().map(Json::from_json).collect::<Vec<_>>();
let timer = std::time::Instant::now();
let mut json_cols = Jsons::default();
json_cols.extend(values.iter());
let time = timer.elapsed();
println!("{:?}\tjson_cols formed", time);
println!("\t\tjson_cols.arrays.len: {:?}", json_cols.arrays.len());
let timer = std::time::Instant::now();
for (index, value) in values.iter().enumerate() {
if (&json_cols).get(index) != *value {
println!("Mismatch: {:?}: {:?}", index, value);
}
}
let time = timer.elapsed();
println!("{:?}\tcompared", time);
let timer = std::time::Instant::now();
let _ = json_cols.clone();
let time = timer.elapsed();
println!("{:?}\tjson_cols cloned", time);
let timer = std::time::Instant::now();
let encoded: Vec<u8> = bincode::serialize(&json_cols).unwrap();
let time = timer.elapsed();
println!("{:?}\tjson_cols encode ({} bytes; bincode)", time, encoded.len());
let timer = std::time::Instant::now();
let decoded: Jsons = bincode::deserialize(&encoded[..]).unwrap();
let time = timer.elapsed();
println!("{:?}\tjson_cols decode (bincode)", time);
assert_eq!(json_cols, decoded);
}