jsonb/core/databend/
builder.rs

1// Copyright 2023 Datafuse Labs.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use core::ops::Range;
16use std::collections::BTreeMap;
17
18use byteorder::BigEndian;
19use byteorder::WriteBytesExt;
20
21use super::constants::*;
22use super::jentry::JEntry;
23use crate::core::JsonbItem;
24use crate::error::Error;
25use crate::error::Result;
26use crate::OwnedJsonb;
27use crate::RawJsonb;
28
29pub(crate) struct ArrayBuilder<'a> {
30    items: Vec<JsonbItem<'a>>,
31}
32
33impl<'a> ArrayBuilder<'a> {
34    pub(crate) fn new() -> Self {
35        Self { items: Vec::new() }
36    }
37
38    pub(crate) fn with_capacity(capacity: usize) -> Self {
39        Self {
40            items: Vec::with_capacity(capacity),
41        }
42    }
43
44    pub(crate) fn push_jsonb_item(&mut self, item: JsonbItem<'a>) {
45        self.items.push(item);
46    }
47
48    pub(crate) fn push_raw_jsonb(&mut self, raw: RawJsonb<'a>) {
49        let item = JsonbItem::Raw(raw);
50        self.items.push(item);
51    }
52
53    pub(crate) fn push_owned_jsonb(&mut self, owned: OwnedJsonb) {
54        let item = JsonbItem::Owned(owned);
55        self.push_jsonb_item(item)
56    }
57
58    pub(crate) fn build(self) -> Result<OwnedJsonb> {
59        let mut buf = Vec::new();
60        let header = ARRAY_CONTAINER_TAG | self.items.len() as u32;
61        buf.write_u32::<BigEndian>(header)?;
62
63        let mut jentry_index = reserve_jentries(&mut buf, self.items.len() * 4);
64        for item in self.items.into_iter() {
65            append_jsonb_item(&mut buf, &mut jentry_index, item)?;
66        }
67        Ok(OwnedJsonb::new(buf))
68    }
69}
70
71pub(crate) struct ArrayDistinctBuilder<'a> {
72    items: Vec<JsonbItem<'a>>,
73    item_map: BTreeMap<JsonbItem<'a>, usize>,
74}
75
76impl<'a> ArrayDistinctBuilder<'a> {
77    pub(crate) fn new(capacity: usize) -> Self {
78        Self {
79            items: Vec::with_capacity(capacity),
80            item_map: BTreeMap::new(),
81        }
82    }
83
84    pub(crate) fn push_jsonb_item(&mut self, item: JsonbItem<'a>) {
85        if let Some(cnt) = self.item_map.get_mut(&item) {
86            *cnt += 1;
87        } else {
88            self.item_map.insert(item.clone(), 1);
89            self.items.push(item);
90        }
91    }
92
93    pub(crate) fn push_raw_jsonb(&mut self, raw: RawJsonb<'a>) {
94        let item = JsonbItem::Raw(raw);
95        self.push_jsonb_item(item);
96    }
97
98    pub(crate) fn pop_jsonb_item(&mut self, item: JsonbItem<'a>) -> Option<()> {
99        if let Some(cnt) = self.item_map.get_mut(&item) {
100            if *cnt > 0 {
101                *cnt -= 1;
102                return Some(());
103            }
104        }
105        None
106    }
107
108    pub(crate) fn pop_raw_jsonb(&mut self, raw: RawJsonb<'a>) -> Option<()> {
109        let item = JsonbItem::Raw(raw);
110        self.pop_jsonb_item(item)
111    }
112
113    pub(crate) fn build(self) -> Result<OwnedJsonb> {
114        let mut buf = Vec::new();
115        let header = ARRAY_CONTAINER_TAG | self.items.len() as u32;
116        buf.write_u32::<BigEndian>(header)?;
117
118        let mut jentry_index = reserve_jentries(&mut buf, self.items.len() * 4);
119        for item in self.items.into_iter() {
120            append_jsonb_item(&mut buf, &mut jentry_index, item)?;
121        }
122        Ok(OwnedJsonb::new(buf))
123    }
124}
125
126pub(crate) struct ObjectBuilder<'a> {
127    entries: BTreeMap<&'a str, JsonbItem<'a>>,
128}
129
130impl<'a> ObjectBuilder<'a> {
131    pub(crate) fn new() -> Self {
132        Self {
133            entries: BTreeMap::new(),
134        }
135    }
136
137    pub(crate) fn push_jsonb_item(&mut self, key: &'a str, val_item: JsonbItem<'a>) -> Result<()> {
138        if self.entries.contains_key(key) {
139            return Err(Error::ObjectDuplicateKey);
140        }
141        self.entries.insert(key, val_item);
142        Ok(())
143    }
144
145    pub(crate) fn push_raw_jsonb(&mut self, key: &'a str, raw: RawJsonb<'a>) -> Result<()> {
146        let item = JsonbItem::Raw(raw);
147        self.push_jsonb_item(key, item)
148    }
149
150    pub(crate) fn push_owned_jsonb(&mut self, key: &'a str, owned: OwnedJsonb) -> Result<()> {
151        let item = JsonbItem::Owned(owned);
152        self.push_jsonb_item(key, item)
153    }
154
155    pub(crate) fn contains_key(&self, key: &'a str) -> bool {
156        self.entries.contains_key(key)
157    }
158
159    pub(crate) fn build(self) -> Result<OwnedJsonb> {
160        let mut buf = Vec::new();
161        let header = OBJECT_CONTAINER_TAG | self.entries.len() as u32;
162        buf.write_u32::<BigEndian>(header)?;
163
164        let mut jentry_index = reserve_jentries(&mut buf, self.entries.len() * 8);
165        for (key, _) in self.entries.iter() {
166            let key_len = key.len();
167            buf.extend_from_slice(key.as_bytes());
168            let jentry = JEntry::make_string_jentry(key_len);
169            replace_jentry(&mut buf, jentry, &mut jentry_index)
170        }
171        for (_, item) in self.entries.into_iter() {
172            append_jsonb_item(&mut buf, &mut jentry_index, item)?;
173        }
174        Ok(OwnedJsonb::new(buf))
175    }
176}
177
178fn append_jsonb_item(buf: &mut Vec<u8>, jentry_index: &mut usize, item: JsonbItem) -> Result<()> {
179    match item {
180        JsonbItem::Null => {
181            let jentry = JEntry::make_null_jentry();
182            replace_jentry(buf, jentry, jentry_index);
183        }
184        JsonbItem::Boolean(v) => {
185            let jentry = if v {
186                JEntry::make_true_jentry()
187            } else {
188                JEntry::make_false_jentry()
189            };
190            replace_jentry(buf, jentry, jentry_index);
191        }
192        JsonbItem::Number(data) => {
193            let jentry = JEntry::make_number_jentry(data.len());
194            replace_jentry(buf, jentry, jentry_index);
195            buf.extend_from_slice(data);
196        }
197        JsonbItem::String(data) => {
198            let jentry = JEntry::make_string_jentry(data.len());
199            replace_jentry(buf, jentry, jentry_index);
200            buf.extend_from_slice(data);
201        }
202        JsonbItem::Raw(raw_jsonb) => {
203            append_raw_jsonb_data(buf, jentry_index, raw_jsonb)?;
204        }
205        JsonbItem::Owned(owned_jsonb) => {
206            let raw_jsonb = owned_jsonb.as_raw();
207            append_raw_jsonb_data(buf, jentry_index, raw_jsonb)?;
208        }
209    }
210    Ok(())
211}
212
213fn append_raw_jsonb_data(
214    buf: &mut Vec<u8>,
215    jentry_index: &mut usize,
216    raw_jsonb: RawJsonb,
217) -> Result<()> {
218    let (header_type, _) = raw_jsonb.read_header(0)?;
219    if header_type == SCALAR_CONTAINER_TAG {
220        let scalar_jentry = raw_jsonb.read_jentry(4)?;
221        let range = Range {
222            start: 8,
223            end: raw_jsonb.len(),
224        };
225        let data = raw_jsonb.slice(range)?;
226        replace_jentry(buf, scalar_jentry, jentry_index);
227        buf.extend_from_slice(data);
228    } else {
229        let jentry = JEntry::make_container_jentry(raw_jsonb.len());
230        replace_jentry(buf, jentry, jentry_index);
231        buf.extend_from_slice(raw_jsonb.data);
232    }
233    Ok(())
234}
235
236fn reserve_jentries(buf: &mut Vec<u8>, len: usize) -> usize {
237    let old_len = buf.len();
238    let new_len = old_len + len;
239    buf.resize(new_len, 0);
240    old_len
241}
242
243fn replace_jentry(buf: &mut [u8], jentry: JEntry, jentry_index: &mut usize) {
244    let jentry_bytes = jentry.encoded().to_be_bytes();
245    for (i, b) in jentry_bytes.iter().enumerate() {
246        buf[*jentry_index + i] = *b;
247    }
248    *jentry_index += 4;
249}
250
251#[cfg(test)]
252mod tests {
253    use std::collections::BTreeMap;
254
255    use super::ArrayBuilder;
256    use super::ObjectBuilder;
257    use crate::to_owned_jsonb;
258    use crate::Value;
259
260    #[test]
261    fn test_build_with_inner_array() {
262        let from_builder = {
263            let mut builder = ObjectBuilder::new();
264            let mut inner_array_builder = ArrayBuilder::with_capacity(1);
265
266            let val = to_owned_jsonb(&false).unwrap();
267            inner_array_builder.push_owned_jsonb(val);
268            let array = inner_array_builder.build().unwrap();
269
270            builder.push_owned_jsonb("arr", array).unwrap();
271            let object = builder.build().unwrap();
272            object.to_vec()
273        };
274        let mut from_encoder = Vec::new();
275        {
276            let value = init_object(vec![("arr", Value::Array(vec![Value::Bool(false)]))]);
277            value.write_to_vec(&mut from_encoder);
278        }
279        assert_eq!(from_builder, from_encoder);
280    }
281
282    #[test]
283    fn test_build_with_inner_object() {
284        let from_builder = {
285            let mut builder = ObjectBuilder::new();
286            let mut inner_obj_builder = ObjectBuilder::new();
287
288            let val = to_owned_jsonb(&true).unwrap();
289            inner_obj_builder.push_owned_jsonb("field", val).unwrap();
290            let inner_obj = inner_obj_builder.build().unwrap();
291
292            builder.push_owned_jsonb("obj", inner_obj).unwrap();
293            let object = builder.build().unwrap();
294            object.to_vec()
295        };
296        let mut from_encoder = Vec::new();
297        {
298            let value = init_object(vec![(
299                "obj",
300                init_object(vec![("field", Value::Bool(true))]),
301            )]);
302            value.write_to_vec(&mut from_encoder);
303        }
304        assert_eq!(from_builder, from_encoder);
305    }
306
307    fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> {
308        let mut map = BTreeMap::new();
309        for (key, val) in entries {
310            map.insert(key.to_string(), val);
311        }
312        Value::Object(map)
313    }
314}