Skip to main content

laminar_sql/datafusion/
json_types.rs

1//! JSONB type system for JSON UDF evaluation.
2//!
3//! Minimal JSONB binary access types used by the JSON scalar UDFs.
4//! Type tags are imported from the canonical definition in `laminar-core`.
5
6/// Re-export canonical JSONB binary format type tags from `laminar-core`.
7pub use laminar_core::serialization::jsonb_tags as tags;
8
9/// Returns the PostgreSQL-compatible type name for the outermost JSONB value.
10///
11/// Reads only the first byte (type tag) — O(1).
12#[must_use]
13pub fn jsonb_type_name(jsonb: &[u8]) -> Option<&'static str> {
14    Some(match *jsonb.first()? {
15        tags::NULL => "null",
16        tags::BOOL_FALSE | tags::BOOL_TRUE => "boolean",
17        tags::INT64 | tags::FLOAT64 => "number",
18        tags::STRING => "string",
19        tags::ARRAY => "array",
20        tags::OBJECT => "object",
21        _ => return None,
22    })
23}
24
25/// Access a field by name in a JSONB object.
26///
27/// Returns a byte slice pointing to the field's JSONB value,
28/// or `None` if the field does not exist or the value is not an object.
29///
30/// O(log n) binary search on sorted keys.
31#[must_use]
32pub fn jsonb_get_field<'a>(jsonb: &'a [u8], field_name: &str) -> Option<&'a [u8]> {
33    if jsonb.first()? != &tags::OBJECT {
34        return None;
35    }
36
37    let field_count = read_u32(jsonb, 1)? as usize;
38    if field_count == 0 {
39        return None;
40    }
41
42    let offset_table_start = 5;
43    let data_start = offset_table_start + field_count * 8;
44
45    let mut lo = 0usize;
46    let mut hi = field_count;
47    while lo < hi {
48        let mid = lo + (hi - lo) / 2;
49        let entry = offset_table_start + mid * 8;
50        let key_off = read_u32(jsonb, entry)? as usize;
51
52        let key_abs = data_start + key_off;
53        let key_len = read_u16(jsonb, key_abs)? as usize;
54        let key_bytes = jsonb.get(key_abs + 2..key_abs + 2 + key_len)?;
55        let key_str = std::str::from_utf8(key_bytes).ok()?;
56
57        match key_str.cmp(field_name) {
58            std::cmp::Ordering::Equal => {
59                let val_off = read_u32(jsonb, entry + 4)? as usize;
60                return jsonb.get(data_start + val_off..);
61            }
62            std::cmp::Ordering::Less => lo = mid + 1,
63            std::cmp::Ordering::Greater => hi = mid,
64        }
65    }
66    None
67}
68
69/// Get a JSONB array element by index.
70///
71/// Returns a byte slice pointing to the element's JSONB value,
72/// or `None` if the index is out of bounds or the value is not an array.
73#[must_use]
74pub fn jsonb_array_get(jsonb: &[u8], index: usize) -> Option<&[u8]> {
75    if jsonb.first()? != &tags::ARRAY {
76        return None;
77    }
78    let count = read_u32(jsonb, 1)? as usize;
79    if index >= count {
80        return None;
81    }
82    let offset_table_start = 5;
83    let data_start = offset_table_start + count * 4;
84    let entry_pos = offset_table_start + index * 4;
85    let elem_off = read_u32(jsonb, entry_pos)? as usize;
86    jsonb.get(data_start + elem_off..)
87}
88
89/// Check if a JSONB object contains a given key.
90#[must_use]
91pub fn jsonb_has_key(jsonb: &[u8], key: &str) -> bool {
92    jsonb_get_field(jsonb, key).is_some()
93}
94
95/// Convert a JSONB value slice to its text representation.
96///
97/// For strings, returns the unquoted string value.
98/// For other types, returns the JSON representation.
99#[must_use]
100pub fn jsonb_to_text(jsonb: &[u8]) -> Option<String> {
101    let tag = *jsonb.first()?;
102    match tag {
103        tags::BOOL_FALSE => Some("false".to_owned()),
104        tags::BOOL_TRUE => Some("true".to_owned()),
105        tags::INT64 => {
106            let v = i64::from_le_bytes(jsonb.get(1..9)?.try_into().ok()?);
107            Some(v.to_string())
108        }
109        tags::FLOAT64 => {
110            let v = f64::from_le_bytes(jsonb.get(1..9)?.try_into().ok()?);
111            Some(v.to_string())
112        }
113        tags::STRING => {
114            let len = read_u32(jsonb, 1)? as usize;
115            Some(std::str::from_utf8(jsonb.get(5..5 + len)?).ok()?.to_owned())
116        }
117        tags::ARRAY | tags::OBJECT => jsonb_to_json_string(jsonb),
118        // NULL and unknown tags return None (PostgreSQL returns NULL for null)
119        _ => None,
120    }
121}
122
123/// Convert a JSONB value to a JSON string representation.
124fn jsonb_to_json_string(jsonb: &[u8]) -> Option<String> {
125    let tag = *jsonb.first()?;
126    Some(match tag {
127        tags::NULL => "null".to_owned(),
128        tags::BOOL_FALSE => "false".to_owned(),
129        tags::BOOL_TRUE => "true".to_owned(),
130        tags::INT64 => {
131            let v = i64::from_le_bytes(jsonb.get(1..9)?.try_into().ok()?);
132            v.to_string()
133        }
134        tags::FLOAT64 => {
135            let v = f64::from_le_bytes(jsonb.get(1..9)?.try_into().ok()?);
136            v.to_string()
137        }
138        tags::STRING => {
139            let len = read_u32(jsonb, 1)? as usize;
140            let s = std::str::from_utf8(jsonb.get(5..5 + len)?).ok()?;
141            format!("\"{s}\"")
142        }
143        tags::ARRAY => {
144            let count = read_u32(jsonb, 1)? as usize;
145            let mut parts = Vec::with_capacity(count);
146            for i in 0..count {
147                let elem = jsonb_array_get(jsonb, i)?;
148                parts.push(jsonb_to_json_string(elem)?);
149            }
150            format!("[{}]", parts.join(","))
151        }
152        tags::OBJECT => {
153            let count = read_u32(jsonb, 1)? as usize;
154            let offset_table_start = 5;
155            let data_start = offset_table_start + count * 8;
156            let mut parts = Vec::with_capacity(count);
157            for i in 0..count {
158                let entry = offset_table_start + i * 8;
159                let key_off = read_u32(jsonb, entry)? as usize;
160                let key_abs = data_start + key_off;
161                let key_len = read_u16(jsonb, key_abs)? as usize;
162                let key =
163                    std::str::from_utf8(jsonb.get(key_abs + 2..key_abs + 2 + key_len)?).ok()?;
164                let val_off = read_u32(jsonb, entry + 4)? as usize;
165                let val_slice = jsonb.get(data_start + val_off..)?;
166                parts.push(format!("\"{}\":{}", key, jsonb_to_json_string(val_slice)?));
167            }
168            format!("{{{}}}", parts.join(","))
169        }
170        _ => return None,
171    })
172}
173
174/// Convert a JSONB binary value to a `serde_json::Value`.
175///
176/// Recursively decodes the JSONB binary format into the equivalent
177/// `serde_json::Value`, avoiding the text round-trip through JSON strings.
178///
179/// Returns `None` if the JSONB bytes are malformed.
180#[must_use]
181#[allow(clippy::cast_possible_truncation)]
182pub fn jsonb_to_value(jsonb: &[u8]) -> Option<serde_json::Value> {
183    let tag = *jsonb.first()?;
184    Some(match tag {
185        tags::NULL => serde_json::Value::Null,
186        tags::BOOL_FALSE => serde_json::Value::Bool(false),
187        tags::BOOL_TRUE => serde_json::Value::Bool(true),
188        tags::INT64 => {
189            let v = i64::from_le_bytes(jsonb.get(1..9)?.try_into().ok()?);
190            serde_json::Value::Number(v.into())
191        }
192        tags::FLOAT64 => {
193            let v = f64::from_le_bytes(jsonb.get(1..9)?.try_into().ok()?);
194            serde_json::Value::Number(serde_json::Number::from_f64(v)?)
195        }
196        tags::STRING => {
197            let len = read_u32(jsonb, 1)? as usize;
198            let s = std::str::from_utf8(jsonb.get(5..5 + len)?).ok()?;
199            serde_json::Value::String(s.to_owned())
200        }
201        tags::ARRAY => {
202            let count = read_u32(jsonb, 1)? as usize;
203            let mut arr = Vec::with_capacity(count);
204            for i in 0..count {
205                let elem = jsonb_array_get(jsonb, i)?;
206                arr.push(jsonb_to_value(elem)?);
207            }
208            serde_json::Value::Array(arr)
209        }
210        tags::OBJECT => {
211            let count = read_u32(jsonb, 1)? as usize;
212            let offset_table_start = 5;
213            let data_start = offset_table_start + count * 8;
214            let mut map = serde_json::Map::with_capacity(count);
215            for i in 0..count {
216                let entry = offset_table_start + i * 8;
217                let key_off = read_u32(jsonb, entry)? as usize;
218                let key_abs = data_start + key_off;
219                let key_len = read_u16(jsonb, key_abs)? as usize;
220                let key =
221                    std::str::from_utf8(jsonb.get(key_abs + 2..key_abs + 2 + key_len)?).ok()?;
222                let val_off = read_u32(jsonb, entry + 4)? as usize;
223                let val_slice = jsonb.get(data_start + val_off..)?;
224                map.insert(key.to_owned(), jsonb_to_value(val_slice)?);
225            }
226            serde_json::Value::Object(map)
227        }
228        _ => return None,
229    })
230}
231
232/// Check whether JSONB `left` contains `right` (PostgreSQL `@>` semantics).
233///
234/// An object contains another if every key in `right` exists in `left`
235/// with a matching value. An array contains another if it's a superset.
236/// Scalars match by equality.
237#[must_use]
238pub fn jsonb_contains(left: &[u8], right: &[u8]) -> Option<bool> {
239    let lt = *left.first()?;
240    let rt = *right.first()?;
241
242    if lt != rt {
243        return Some(false);
244    }
245
246    Some(match lt {
247        tags::NULL | tags::BOOL_FALSE | tags::BOOL_TRUE => true, // tags already matched
248        tags::INT64 | tags::FLOAT64 => left.get(1..9)? == right.get(1..9)?,
249        tags::STRING => {
250            let l_len = read_u32(left, 1)? as usize;
251            let r_len = read_u32(right, 1)? as usize;
252            l_len == r_len && left.get(5..5 + l_len)? == right.get(5..5 + r_len)?
253        }
254        tags::OBJECT => {
255            // Every key in right must exist in left with a contained value.
256            let r_count = read_u32(right, 1)? as usize;
257            let r_offset_table = 5;
258            let r_data_start = r_offset_table + r_count * 8;
259            for i in 0..r_count {
260                let entry = r_offset_table + i * 8;
261                let key_off = read_u32(right, entry)? as usize;
262                let key_abs = r_data_start + key_off;
263                let key_len = read_u16(right, key_abs)? as usize;
264                let key =
265                    std::str::from_utf8(right.get(key_abs + 2..key_abs + 2 + key_len)?).ok()?;
266
267                let val_off = read_u32(right, entry + 4)? as usize;
268                let r_val = right.get(r_data_start + val_off..)?;
269
270                match jsonb_get_field(left, key) {
271                    Some(l_val) => {
272                        if jsonb_contains(l_val, r_val) != Some(true) {
273                            return Some(false);
274                        }
275                    }
276                    None => return Some(false), // key not found
277                }
278            }
279            true
280        }
281        tags::ARRAY => {
282            // Every element in right must exist somewhere in left.
283            let r_count = read_u32(right, 1)? as usize;
284            let l_count = read_u32(left, 1)? as usize;
285            'outer: for ri in 0..r_count {
286                let r_elem = jsonb_array_get(right, ri)?;
287                for li in 0..l_count {
288                    let l_elem = jsonb_array_get(left, li)?;
289                    if jsonb_contains(l_elem, r_elem) == Some(true) {
290                        continue 'outer;
291                    }
292                }
293                return Some(false);
294            }
295            true
296        }
297        _ => false,
298    })
299}
300
301/// Encode a `serde_json::Value` into JSONB binary format.
302///
303/// Used by `json_build_object`, `json_build_array`, `to_jsonb` etc.
304#[must_use]
305#[allow(clippy::cast_possible_truncation)]
306pub fn encode_jsonb(value: &serde_json::Value) -> Vec<u8> {
307    let mut buf = Vec::with_capacity(256);
308    encode_jsonb_into(value, &mut buf);
309    buf
310}
311
312/// Encode a JSON value into the given buffer.
313#[allow(clippy::cast_possible_truncation)]
314pub fn encode_jsonb_into(value: &serde_json::Value, buf: &mut Vec<u8>) {
315    match value {
316        serde_json::Value::Null => buf.push(tags::NULL),
317        serde_json::Value::Bool(false) => buf.push(tags::BOOL_FALSE),
318        serde_json::Value::Bool(true) => buf.push(tags::BOOL_TRUE),
319        serde_json::Value::Number(n) => {
320            if let Some(i) = n.as_i64() {
321                buf.push(tags::INT64);
322                buf.extend_from_slice(&i.to_le_bytes());
323            } else if let Some(f) = n.as_f64() {
324                buf.push(tags::FLOAT64);
325                buf.extend_from_slice(&f.to_le_bytes());
326            }
327        }
328        serde_json::Value::String(s) => {
329            buf.push(tags::STRING);
330            buf.extend_from_slice(&(s.len() as u32).to_le_bytes());
331            buf.extend_from_slice(s.as_bytes());
332        }
333        serde_json::Value::Array(arr) => {
334            buf.push(tags::ARRAY);
335            buf.extend_from_slice(&(arr.len() as u32).to_le_bytes());
336            let offset_table_pos = buf.len();
337            buf.resize(buf.len() + arr.len() * 4, 0);
338            let data_start = buf.len();
339            for (i, elem) in arr.iter().enumerate() {
340                let elem_offset = (buf.len() - data_start) as u32;
341                let entry_pos = offset_table_pos + i * 4;
342                buf[entry_pos..entry_pos + 4].copy_from_slice(&elem_offset.to_le_bytes());
343                encode_jsonb_into(elem, buf);
344            }
345        }
346        serde_json::Value::Object(obj) => {
347            buf.push(tags::OBJECT);
348            let mut keys: Vec<&String> = obj.keys().collect();
349            keys.sort();
350            buf.extend_from_slice(&(keys.len() as u32).to_le_bytes());
351            let offset_table_pos = buf.len();
352            buf.resize(buf.len() + keys.len() * 8, 0);
353            let data_start = buf.len();
354            for (i, key) in keys.iter().enumerate() {
355                let key_offset = (buf.len() - data_start) as u32;
356                let entry_pos = offset_table_pos + i * 8;
357                buf[entry_pos..entry_pos + 4].copy_from_slice(&key_offset.to_le_bytes());
358                buf.extend_from_slice(&(key.len() as u16).to_le_bytes());
359                buf.extend_from_slice(key.as_bytes());
360                let val_offset = (buf.len() - data_start) as u32;
361                buf[entry_pos + 4..entry_pos + 8].copy_from_slice(&val_offset.to_le_bytes());
362                encode_jsonb_into(&obj[*key], buf);
363            }
364        }
365    }
366}
367
368// ── Helpers ──────────────────────────────────────────────────────
369
370#[inline]
371fn read_u32(buf: &[u8], offset: usize) -> Option<u32> {
372    Some(u32::from_le_bytes(
373        buf.get(offset..offset + 4)?.try_into().ok()?,
374    ))
375}
376
377#[inline]
378fn read_u16(buf: &[u8], offset: usize) -> Option<u16> {
379    Some(u16::from_le_bytes(
380        buf.get(offset..offset + 2)?.try_into().ok()?,
381    ))
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387    use serde_json::json;
388
389    fn enc(v: &serde_json::Value) -> Vec<u8> {
390        encode_jsonb(v)
391    }
392
393    #[test]
394    fn test_type_name() {
395        assert_eq!(jsonb_type_name(&enc(&json!(null))), Some("null"));
396        assert_eq!(jsonb_type_name(&enc(&json!(true))), Some("boolean"));
397        assert_eq!(jsonb_type_name(&enc(&json!(false))), Some("boolean"));
398        assert_eq!(jsonb_type_name(&enc(&json!(42))), Some("number"));
399        assert_eq!(jsonb_type_name(&enc(&json!(3.125))), Some("number"));
400        assert_eq!(jsonb_type_name(&enc(&json!("hi"))), Some("string"));
401        assert_eq!(jsonb_type_name(&enc(&json!([1]))), Some("array"));
402        assert_eq!(jsonb_type_name(&enc(&json!({"a": 1}))), Some("object"));
403        assert_eq!(jsonb_type_name(&[]), None);
404        assert_eq!(jsonb_type_name(&[0xFF]), None);
405    }
406
407    #[test]
408    fn test_get_field() {
409        let b = enc(&json!({"name": "Alice", "age": 30}));
410        let name = jsonb_get_field(&b, "name").unwrap();
411        assert_eq!(jsonb_to_text(name), Some("Alice".to_owned()));
412        let age = jsonb_get_field(&b, "age").unwrap();
413        assert_eq!(jsonb_to_text(age), Some("30".to_owned()));
414        assert!(jsonb_get_field(&b, "missing").is_none());
415    }
416
417    #[test]
418    fn test_array_get() {
419        let b = enc(&json!([10, 20, 30]));
420        let e1 = jsonb_array_get(&b, 1).unwrap();
421        assert_eq!(jsonb_to_text(e1), Some("20".to_owned()));
422        assert!(jsonb_array_get(&b, 5).is_none());
423    }
424
425    #[test]
426    fn test_has_key() {
427        let b = enc(&json!({"a": 1, "b": 2}));
428        assert!(jsonb_has_key(&b, "a"));
429        assert!(!jsonb_has_key(&b, "c"));
430    }
431
432    #[test]
433    fn test_to_text_string() {
434        let b = enc(&json!("hello"));
435        assert_eq!(jsonb_to_text(&b), Some("hello".to_owned()));
436    }
437
438    #[test]
439    fn test_to_text_null() {
440        let b = enc(&json!(null));
441        assert_eq!(jsonb_to_text(&b), None);
442    }
443
444    #[test]
445    fn test_to_text_object() {
446        let b = enc(&json!({"a": 1}));
447        assert_eq!(jsonb_to_text(&b), Some("{\"a\":1}".to_owned()));
448    }
449
450    #[test]
451    fn test_to_text_array() {
452        let b = enc(&json!([1, "two"]));
453        assert_eq!(jsonb_to_text(&b), Some("[1,\"two\"]".to_owned()));
454    }
455
456    #[test]
457    fn test_contains_object() {
458        let left = enc(&json!({"a": 1, "b": 2, "c": 3}));
459        let right = enc(&json!({"a": 1, "c": 3}));
460        assert_eq!(jsonb_contains(&left, &right), Some(true));
461    }
462
463    #[test]
464    fn test_contains_object_false() {
465        let left = enc(&json!({"a": 1}));
466        let right = enc(&json!({"a": 1, "b": 2}));
467        assert_eq!(jsonb_contains(&left, &right), Some(false));
468    }
469
470    #[test]
471    fn test_contains_array() {
472        let left = enc(&json!([1, 2, 3]));
473        let right = enc(&json!([1, 3]));
474        assert_eq!(jsonb_contains(&left, &right), Some(true));
475    }
476
477    #[test]
478    fn test_contains_scalar() {
479        let a = enc(&json!(42));
480        let b = enc(&json!(42));
481        let c = enc(&json!(99));
482        assert_eq!(jsonb_contains(&a, &b), Some(true));
483        assert_eq!(jsonb_contains(&a, &c), Some(false));
484    }
485
486    #[test]
487    fn test_contains_type_mismatch() {
488        let a = enc(&json!(42));
489        let b = enc(&json!("42"));
490        assert_eq!(jsonb_contains(&a, &b), Some(false));
491    }
492
493    #[test]
494    fn test_nested_get() {
495        let b = enc(&json!({"user": {"address": {"city": "London"}}}));
496        let user = jsonb_get_field(&b, "user").unwrap();
497        let addr = jsonb_get_field(user, "address").unwrap();
498        let city = jsonb_get_field(addr, "city").unwrap();
499        assert_eq!(jsonb_to_text(city), Some("London".to_owned()));
500    }
501
502    #[test]
503    fn test_encode_decode_roundtrip() {
504        let vals = vec![
505            json!(null),
506            json!(true),
507            json!(false),
508            json!(42),
509            json!(3.125),
510            json!("hello"),
511            json!([1, "two", null]),
512            json!({"key": "value", "num": 42}),
513        ];
514        for v in vals {
515            let b = enc(&v);
516            let text = jsonb_to_json_string(&b);
517            assert!(text.is_some(), "Failed to round-trip: {v:?}");
518        }
519    }
520
521    #[test]
522    fn test_jsonb_to_value_scalars() {
523        assert_eq!(jsonb_to_value(&enc(&json!(null))), Some(json!(null)));
524        assert_eq!(jsonb_to_value(&enc(&json!(true))), Some(json!(true)));
525        assert_eq!(jsonb_to_value(&enc(&json!(false))), Some(json!(false)));
526        assert_eq!(jsonb_to_value(&enc(&json!(42))), Some(json!(42)));
527        assert_eq!(jsonb_to_value(&enc(&json!(3.125))), Some(json!(3.125)));
528        assert_eq!(jsonb_to_value(&enc(&json!("hello"))), Some(json!("hello")));
529    }
530
531    #[test]
532    fn test_jsonb_to_value_complex() {
533        let obj = json!({"a": 1, "b": [2, 3], "c": {"d": true}});
534        let bytes = enc(&obj);
535        assert_eq!(jsonb_to_value(&bytes), Some(obj));
536    }
537
538    #[test]
539    fn test_jsonb_to_value_empty() {
540        assert_eq!(jsonb_to_value(&[]), None);
541        assert_eq!(jsonb_to_value(&[0xFF]), None);
542    }
543}