mod blake3_prefix_mac;
mod mac;
mod tokenized_selector;
use std::{collections::HashMap, marker::PhantomData};
use blake3_prefix_mac::Blake3PrefixMac;
use cllw_ore::{CllwOreEncrypt, OreCllw8V1, OreCllw8VariableV1};
use mac::{JsonbMacTerm, Mac, Macca};
use serde_json::Value;
use tokenized_selector::TokenizedSelector;
use crate::{
ejsonpath::{DotArg, IndexArg, Selector},
zerokms::IndexKey,
};
use super::ore_indexer::conversion::IntoOrePlaintext;
pub struct JsonbIndexer<T> {
index_key: IndexKey,
macca: Blake3PrefixMac,
_payload: PhantomData<T>,
}
#[derive(Debug, thiserror::Error)]
pub enum JsonbIndexerError {
#[error(transparent)]
CllwOre(#[from] cllw_ore::Error),
}
pub struct Payload<T>(T);
impl<'a> From<&'a Value> for Payload<Value> {
fn from(value: &'a Value) -> Self {
Payload(value.clone())
}
}
impl<'a> From<&'a Value> for Payload<()> {
fn from(_: &'a Value) -> Self {
Payload(())
}
}
impl<T> JsonbIndexer<T>
where
Payload<T>: for<'a> From<&'a Value>,
{
pub fn new(index_key: IndexKey, prefix: Vec<u8>) -> Self {
Self {
macca: Blake3PrefixMac::new(&index_key, prefix),
index_key,
_payload: PhantomData,
}
}
pub fn index(&mut self, value: Value) -> Result<IndexedValue<T>, JsonbIndexerError> {
self.index_value(Selector::Root, value)
}
fn index_value(
&mut self,
selector: Selector,
value: Value,
) -> Result<IndexedValue<T>, JsonbIndexerError> {
let payload = Payload::<T>::from(&value).0;
let tokenized_selector = self.tokenized_selector(&selector);
match value {
Value::Null => Ok(IndexedValue::Null(
tokenized_selector,
payload,
IndexedNull {
mac_term: self.mac_term(&selector, &JsonbMacTerm::Null),
},
)),
Value::Bool(b) => Ok(IndexedValue::Bool(
tokenized_selector,
payload,
IndexedBool {
ore_term: (if b { 1u32 } else { 0u32 })
.encrypt(&(*self.index_key.key()).into())?,
},
)),
Value::Number(n) => Ok(IndexedValue::Number(
tokenized_selector,
payload,
IndexedNumber {
ore_term: n
.as_f64()
.unwrap()
.to_ore()
.inner()
.encrypt(&(*self.index_key.key()).into())?,
},
)),
Value::String(s) => Ok(IndexedValue::String(
tokenized_selector,
payload,
IndexedString {
ore_term: s.encrypt(&(*self.index_key.key()).into())?,
match_term: None,
},
)),
Value::Array(a) => Ok(IndexedValue::Array(
tokenized_selector,
payload,
IndexedArray {
mac_term: self.mac_term(&selector, &JsonbMacTerm::Array),
elements: a
.into_iter()
.enumerate()
.map(|(idx, item)| {
let item_selector =
Selector::Index(Box::new(selector.clone()), IndexArg::Number(idx));
self.index_value(item_selector, item)
})
.collect::<Result<Vec<_>, _>>()?,
},
)),
Value::Object(o) => Ok(IndexedValue::Object(
tokenized_selector,
payload,
IndexedObject {
mac_term: self.mac_term(&selector, &JsonbMacTerm::Object),
fields: o
.into_iter()
.map(|(key, val)| -> Result<_, JsonbIndexerError> {
let item_selector = Selector::Dot(
Box::new(selector.clone()),
DotArg::Field(key.clone()),
);
Ok((item_selector.clone(), self.index_value(item_selector, val)?))
})
.collect::<Result<HashMap<_, _>, _>>()?,
},
)),
}
}
fn tokenized_selector(&mut self, selector: &Selector) -> TokenizedSelector {
selector.mac(&mut self.macca);
TokenizedSelector(self.macca.finalize_reset())
}
fn mac_term(&mut self, selector: &Selector, term: &JsonbMacTerm) -> [u8; 16] {
selector.mac(&mut self.macca);
term.mac(&mut self.macca);
self.macca.finalize_reset()
}
}
#[derive(Debug)]
pub enum IndexedValue<T> {
Null(TokenizedSelector, T, IndexedNull),
Bool(TokenizedSelector, T, IndexedBool),
Number(TokenizedSelector, T, IndexedNumber),
String(TokenizedSelector, T, IndexedString),
Array(TokenizedSelector, T, IndexedArray<T>),
Object(TokenizedSelector, T, IndexedObject<T>),
}
impl<T> IndexedValue<T> {
pub fn tokenized_selector(&self) -> &TokenizedSelector {
match self {
IndexedValue::Null(ts, _, _)
| IndexedValue::Bool(ts, _, _)
| IndexedValue::Number(ts, _, _)
| IndexedValue::String(ts, _, _)
| IndexedValue::Array(ts, _, _)
| IndexedValue::Object(ts, _, _) => ts,
}
}
pub fn payload(&self) -> &T {
match self {
IndexedValue::Null(_, payload, _) => payload,
IndexedValue::Bool(_, payload, _) => payload,
IndexedValue::Number(_, payload, _) => payload,
IndexedValue::String(_, payload, _) => payload,
IndexedValue::Array(_, payload, _) => payload,
IndexedValue::Object(_, payload, _) => payload,
}
}
pub fn into_payload(self) -> T {
match self {
IndexedValue::Null(_, payload, _) => payload,
IndexedValue::Bool(_, payload, _) => payload,
IndexedValue::Number(_, payload, _) => payload,
IndexedValue::String(_, payload, _) => payload,
IndexedValue::Array(_, payload, _) => payload,
IndexedValue::Object(_, payload, _) => payload,
}
}
pub fn map<U, F, E>(self, mapper: F) -> Result<IndexedValue<U>, E>
where
F: Fn(T) -> Result<U, E>,
{
match self {
IndexedValue::Null(ts, payload, indexed) => {
Ok(IndexedValue::Null(ts, mapper(payload)?, indexed))
}
IndexedValue::Bool(ts, payload, indexed) => {
Ok(IndexedValue::Bool(ts, mapper(payload)?, indexed))
}
IndexedValue::Number(ts, payload, indexed) => {
Ok(IndexedValue::Number(ts, mapper(payload)?, indexed))
}
IndexedValue::String(ts, payload, indexed) => {
Ok(IndexedValue::String(ts, mapper(payload)?, indexed))
}
IndexedValue::Array(ts, payload, indexed) => Ok(IndexedValue::Array(
ts,
mapper(payload)?,
indexed.map(mapper)?,
)),
IndexedValue::Object(ts, payload, indexed) => Ok(IndexedValue::Object(
ts,
mapper(payload)?,
indexed.map(mapper)?,
)),
}
}
}
impl<T> IndexedArray<T> {
pub fn map<U, F, E>(self, mapper: F) -> Result<IndexedArray<U>, E>
where
F: Fn(T) -> Result<U, E>,
{
Ok(IndexedArray {
mac_term: self.mac_term,
elements: self
.elements
.into_iter()
.map(|elem| elem.map(&mapper))
.collect::<Result<Vec<_>, E>>()?,
})
}
}
impl<T> IndexedObject<T> {
pub fn map<U, F, E>(self, mapper: F) -> Result<IndexedObject<U>, E>
where
F: Fn(T) -> Result<U, E>,
{
Ok(IndexedObject {
mac_term: self.mac_term,
fields: self
.fields
.into_iter()
.map(|(key, value)| value.map(&mapper).map(|value| (key, value)))
.collect::<Result<HashMap<_, _>, E>>()?,
})
}
}
#[derive(Debug)]
pub struct IndexedNull {
pub mac_term: [u8; 16],
}
#[derive(Debug)]
pub struct IndexedBool {
pub ore_term: OreCllw8V1<32>,
}
#[derive(Debug)]
pub struct IndexedNumber {
pub ore_term: OreCllw8V1<64>,
}
#[derive(Debug)]
pub struct IndexedString {
pub ore_term: OreCllw8VariableV1,
pub match_term: Option<Vec<u16>>,
}
#[derive(Debug)]
pub struct IndexedArray<T> {
pub mac_term: [u8; 16],
pub elements: Vec<IndexedValue<T>>,
}
#[derive(Debug)]
pub struct IndexedObject<T> {
pub mac_term: [u8; 16],
pub fields: HashMap<Selector, IndexedValue<T>>,
}
pub trait JsonbContains<Rhs = Self> {
fn jsonb_contains(&self, other: &Rhs) -> bool;
}
impl Eq for IndexedNull {}
impl PartialEq for IndexedNull {
fn eq(&self, other: &Self) -> bool {
self.mac_term == other.mac_term
}
}
impl Eq for IndexedBool {}
impl PartialEq for IndexedBool {
fn eq(&self, other: &Self) -> bool {
self.ore_term == other.ore_term
}
}
impl PartialOrd for IndexedBool {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for IndexedBool {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.ore_term.cmp(&other.ore_term)
}
}
impl<T> PartialEq<IndexedValue<()>> for IndexedValue<T> {
fn eq(&self, other: &IndexedValue<()>) -> bool {
match (self, other) {
(IndexedValue::Null(_, _, lhs), IndexedValue::Null(_, _, rhs)) => lhs == rhs,
(IndexedValue::Bool(_, _, lhs), IndexedValue::Bool(_, _, rhs)) => lhs == rhs,
(IndexedValue::Number(_, _, lhs), IndexedValue::Number(_, _, rhs)) => lhs == rhs,
(IndexedValue::String(_, _, lhs), IndexedValue::String(_, _, rhs)) => lhs == rhs,
(IndexedValue::Array(_, _, lhs), IndexedValue::Array(_, _, rhs)) => lhs == rhs,
(IndexedValue::Object(_, _, lhs), IndexedValue::Object(_, _, rhs)) => lhs == rhs,
(_, _) => false,
}
}
}
impl<T> PartialEq<IndexedObject<()>> for IndexedObject<T> {
fn eq(&self, other: &IndexedObject<()>) -> bool {
if self.fields.len() == other.fields.len() {
self.fields
.iter()
.zip(other.fields.iter())
.all(|((lk, lv), (rk, rv))| lk == rk && lv == rv)
} else {
false
}
}
}
impl<T> PartialEq<IndexedArray<()>> for IndexedArray<T> {
fn eq(&self, other: &IndexedArray<()>) -> bool {
self.elements == other.elements
}
}
impl Eq for IndexedNumber {}
impl PartialEq for IndexedNumber {
fn eq(&self, other: &Self) -> bool {
self.ore_term == other.ore_term
}
}
impl PartialOrd for IndexedNumber {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for IndexedNumber {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.ore_term.cmp(&other.ore_term)
}
}
impl Eq for IndexedString {}
impl PartialEq for IndexedString {
fn eq(&self, other: &Self) -> bool {
self.ore_term == other.ore_term
}
}
impl PartialOrd for IndexedString {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for IndexedString {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.ore_term.cmp(&other.ore_term)
}
}
impl<T> JsonbContains<IndexedValue<()>> for IndexedValue<T> {
fn jsonb_contains(&self, other: &IndexedValue<()>) -> bool {
match (self, other) {
(IndexedValue::Null(_, _, lhs), IndexedValue::Null(_, _, rhs)) => lhs == rhs,
(IndexedValue::Bool(_, _, lhs), IndexedValue::Bool(_, _, rhs)) => lhs == rhs,
(IndexedValue::Number(_, _, lhs), IndexedValue::Number(_, _, rhs)) => lhs == rhs,
(IndexedValue::String(_, _, lhs), IndexedValue::String(_, _, rhs)) => lhs == rhs,
(IndexedValue::Array(_, _, lhs), rhs) => lhs.jsonb_contains(rhs),
(IndexedValue::Object(_, _, lhs), rhs) => lhs.jsonb_contains(rhs),
(_, _) => false,
}
}
}
impl<T> JsonbContains<IndexedValue<()>> for IndexedArray<T> {
fn jsonb_contains(&self, other: &IndexedValue<()>) -> bool {
match other {
IndexedValue::Null(_, _, _)
| IndexedValue::Bool(_, _, _)
| IndexedValue::Number(_, _, _)
| IndexedValue::String(_, _, _) => self.elements.iter().any(|elem| elem == other),
IndexedValue::Array(_, _, other_arr) => other_arr.elements.iter().all(|other_elem| {
if let IndexedValue::Array(_, _, _) = other_elem {
self.elements
.iter()
.any(|elem| elem.jsonb_contains(other_elem))
} else {
self.jsonb_contains(other_elem)
}
}),
IndexedValue::Object(_, _, _) => self.elements.iter().any(|elem| {
if let IndexedValue::Object(_, _, obj) = elem {
obj.jsonb_contains(other)
} else {
false
}
}),
}
}
}
impl<T> JsonbContains<IndexedValue<()>> for IndexedObject<T> {
fn jsonb_contains(&self, other: &IndexedValue<()>) -> bool {
match other {
IndexedValue::Null(_, _, _)
| IndexedValue::Bool(_, _, _)
| IndexedValue::Number(_, _, _)
| IndexedValue::String(_, _, _)
| IndexedValue::Array(_, _, _) => false,
IndexedValue::Object(_, _, other_object) => {
other_object
.fields
.iter()
.all(|(field, value)| match self.fields.get(field) {
Some(found) => found.jsonb_contains(value),
None => false,
})
}
}
}
}
#[cfg(test)]
mod test {
use serde_json::{json, Value};
use crate::encryption::jsonb_indexer::JsonbContains;
use super::JsonbIndexer;
#[test]
fn every_object_contains_the_empty_object() {
let mut source_indexer: JsonbIndexer<Value> =
JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let mut query_indexer: JsonbIndexer<()> = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let source = source_indexer.index(json!({})).unwrap();
let query = query_indexer.index(json!({})).unwrap();
assert!(source.jsonb_contains(&query));
let source = source_indexer.index(json!({"a": 123})).unwrap();
assert!(source.jsonb_contains(&query));
}
#[test]
fn nested_object_containment() {
let mut indexer = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let source = indexer
.index(json!({
"a": {
"b": 1
}
}))
.unwrap();
let query = indexer.index(json!({"a": {"b": 1}})).unwrap();
assert!(source.jsonb_contains(&query));
let query = indexer.index(json!({"a": {"c": 1}})).unwrap();
assert!(!source.jsonb_contains(&query));
let query = indexer.index(json!({"a": {"b": 2}})).unwrap();
assert!(!source.jsonb_contains(&query));
}
#[test]
fn every_array_contains_the_empty_array() {
let mut indexer = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let source = indexer.index(json!([])).unwrap();
let query = indexer.index(json!([])).unwrap();
assert!(source.jsonb_contains(&query));
let source = indexer.index(json!([123])).unwrap();
assert!(source.jsonb_contains(&query));
}
#[test]
fn simple_array_containment() {
let mut indexer = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let source = indexer.index(json!([1, 2, 3])).unwrap();
let query = indexer.index(json!([2, 1])).unwrap();
assert!(source.jsonb_contains(&query));
let source = indexer.index(json!([1, 2, 3])).unwrap();
let query = indexer.index(json!([2, 1, 5])).unwrap();
assert!(!source.jsonb_contains(&query));
let source = indexer.index(json!([1, 2, 3])).unwrap();
let query = indexer.index(json!([2, 2])).unwrap();
assert!(source.jsonb_contains(&query));
}
#[test]
fn nested_array_containment() {
let mut indexer = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let source = indexer.index(json!([1, 2, [3, 4]])).unwrap();
let query = indexer.index(json!([[3]])).unwrap();
assert!(source.jsonb_contains(&query));
let source = indexer.index(json!([1, 2, [3, 4]])).unwrap();
let query = indexer.index(json!([3])).unwrap();
assert!(!source.jsonb_contains(&query));
let query = indexer.index(json!([1, [4, 5]])).unwrap();
assert!(!source.jsonb_contains(&query));
}
#[test]
fn more_complex() {
let mut indexer = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let indexed_json = indexer
.index(json!({
"name": "Alice",
"description": "Mystical cryptography persona",
"is_smart": true,
"likes": [
"apples",
"chocolate",
"cryptography"
]
}))
.unwrap();
let query = indexer
.index(json!({
"name": "Alice",
"likes": [
"chocolate",
]
}))
.unwrap();
assert!(indexed_json.jsonb_contains(&query))
}
#[test]
fn primitive_containment() {
assert!(contains(json!(1), json!(1)));
assert!(contains(json!(true), json!(true)));
assert!(contains(json!(null), json!(null)));
assert!(contains(json!("hello"), json!("hello")));
assert!(!contains(json!(1), json!(2)));
assert!(!contains(json!(true), json!(false)));
assert!(!contains(json!("foo"), json!("bar")));
assert!(!contains(json!(null), json!(false)));
assert!(!contains(json!(null), json!("foo")));
assert!(!contains(json!(null), json!(1)));
assert!(!contains(json!(true), json!("foo")));
assert!(!contains(json!(true), json!(1)));
}
#[test]
fn non_primitive_contains() {
assert!(!contains(json!({}), json!([])));
assert!(!contains(json!([]), json!({})));
assert!(!contains(json!([]), json!(1)));
assert!(!contains(json!({}), json!(1)));
}
fn contains(source: serde_json::Value, query: serde_json::Value) -> bool {
let mut indexer = JsonbIndexer::new([0; 32].into(), vec![1; 32]);
let source = indexer.index(source).unwrap();
let query = indexer.index(query).unwrap();
source.jsonb_contains(&query)
}
}