use serde::Deserializer;
use serde::de::{DeserializeSeed, MapAccess, SeqAccess, Visitor};
use std::cell::RefCell;
use crate::order::NodeKind;
use crate::utils::tree_arena::{JsonTreeArena, JsonTreeNode};
use crate::ingest::sampling::ArraySamplerKind;
#[derive(Default)]
pub(crate) struct JsonTreeBuilder {
arena: RefCell<JsonTreeArena>,
pub(crate) array_cap: usize,
sampler: ArraySamplerKind,
}
impl JsonTreeBuilder {
pub(crate) fn new(array_cap: usize, sampler: ArraySamplerKind) -> Self {
Self {
arena: RefCell::new(JsonTreeArena::default()),
array_cap,
sampler,
}
}
pub(crate) fn seed(&self) -> NodeSeed<'_> {
NodeSeed { b: self }
}
pub(crate) fn finish(self) -> JsonTreeArena {
self.arena.into_inner()
}
#[cfg(test)]
pub(crate) fn push_object_root(
&self,
keys: Vec<String>,
children: Vec<usize>,
) -> usize {
let id = self.push_default();
let count = keys.len().min(children.len());
self.finish_object(id, count, children, keys);
id
}
pub(crate) fn push_default(&self) -> usize {
let mut a = self.arena.borrow_mut();
let id = a.nodes.len();
a.nodes.push(JsonTreeNode::default());
id
}
fn push_with(&self, set: impl FnOnce(&mut JsonTreeNode)) -> usize {
let id = self.push_default();
let mut a = self.arena.borrow_mut();
let n = &mut a.nodes[id];
set(n);
id
}
fn push_number<N>(&self, v: N) -> usize
where
serde_json::Number: From<N>,
{
self.push_with(|n| {
n.kind = NodeKind::Number;
let num = serde_json::Number::from(v);
n.atomic_token = Some(num.to_string());
})
}
fn push_bool(&self, v: bool) -> usize {
self.push_with(|n| {
n.kind = NodeKind::Bool;
n.atomic_token =
Some(if v { "true" } else { "false" }.to_string());
})
}
fn push_string_owned(&self, s: String) -> usize {
self.push_with(|n| {
n.kind = NodeKind::String;
n.string_value = Some(s);
})
}
fn push_null(&self) -> usize {
self.push_with(|n| {
n.kind = NodeKind::Null;
n.atomic_token = Some("null".to_string());
})
}
pub(crate) fn finish_array(
&self,
id: usize,
kept: usize,
total: usize,
local_children: Vec<usize>,
local_indices: Vec<usize>,
) {
let mut a = self.arena.borrow_mut();
let children_start = a.children.len();
a.children.extend(local_children);
let contiguous = local_indices.len() == kept
&& local_indices.iter().enumerate().all(|(i, &idx)| idx == i);
let (arr_indices_start, pushed_len) =
if kept == 0 || contiguous || local_indices.is_empty() {
(0usize, 0usize)
} else {
let start = a.arr_indices.len();
a.arr_indices.extend(local_indices);
let pushed = a.arr_indices.len().saturating_sub(start);
(start, pushed)
};
let n = &mut a.nodes[id];
n.kind = NodeKind::Array;
n.children_start = children_start;
n.children_len = kept;
n.array_len = Some(total);
n.arr_indices_start = arr_indices_start;
n.arr_indices_len = pushed_len.min(kept);
}
fn finish_object(
&self,
id: usize,
count: usize,
local_children: Vec<usize>,
local_keys: Vec<String>,
) {
let mut a = self.arena.borrow_mut();
let children_start = a.children.len();
let obj_keys_start = a.obj_keys.len();
a.children.extend(local_children);
a.obj_keys.extend(local_keys);
let n = &mut a.nodes[id];
n.kind = NodeKind::Object;
n.children_start = children_start;
n.children_len = count;
n.obj_keys_start = obj_keys_start;
n.obj_keys_len = count;
n.object_len = Some(count);
}
}
pub(crate) struct NodeSeed<'a> {
pub(crate) b: &'a JsonTreeBuilder,
}
impl<'de> DeserializeSeed<'de> for NodeSeed<'_> {
type Value = usize;
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(NodeVisitor { b: self.b })
}
}
struct NodeVisitor<'b> {
b: &'b JsonTreeBuilder,
}
impl<'de> Visitor<'de> for NodeVisitor<'_> {
type Value = usize;
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "any JSON value")
}
fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.b.push_bool(v))
}
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.b.push_number(v))
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.b.push_number(v))
}
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let num = serde_json::Number::from_f64(v)
.ok_or_else(|| E::custom("invalid f64"))?;
let id = self.b.push_with(|n| {
n.kind = NodeKind::Number;
n.atomic_token = Some(num.to_string());
});
Ok(id)
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.b.push_string_owned(v.to_owned()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.b.push_string_owned(v))
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(self.b.push_null())
}
fn visit_none<E>(self) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
self.visit_unit()
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let id = self.b.push_default();
let sampled = self.b.sampler.sample_stream(
&mut seq,
self.b,
self.b.array_cap,
)?;
let kept = sampled.children.len();
self.b.finish_array(
id,
kept,
sampled.total_len,
sampled.children,
sampled.indices,
);
Ok(id)
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
A: MapAccess<'de>,
{
let id = self.b.push_default();
let mut local_children: Vec<usize> = Vec::new();
let mut local_keys: Vec<String> = Vec::new();
let low = map.size_hint().unwrap_or(0);
local_children.reserve(low);
local_keys.reserve(low);
let mut count = 0usize;
while let Some(key) = map.next_key::<String>()? {
let cid: usize = {
let seed = self.b.seed();
map.next_value_seed(seed)?
};
local_children.push(cid);
local_keys.push(key);
count += 1;
}
self.b.finish_object(id, count, local_children, local_keys);
Ok(id)
}
}
impl JsonTreeBuilder {}