use std::sync::Arc;
use croaring::Bitmap;
use crate::api::{json_string_eq, StructuralIndex, TokenId};
#[derive(Clone, Debug)]
pub enum Op {
LoadKey(Arc<str>),
LoadDepth(u16),
LoadSubtree(TokenId),
LoadAll,
LoadOne(TokenId),
And(Box<Op>, Box<Op>),
Or(Box<Op>, Box<Op>),
Sub(Box<Op>, Box<Op>),
FieldOf(Arc<str>),
Descendants,
AllChildren,
ValueEqLit(Vec<u8>),
First,
Last,
Nth(u32),
Count,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum CardClass {
Single,
Subset,
Expand,
Constant,
Reduce(u32),
}
impl Op {
pub fn card_class(&self) -> CardClass {
match self {
Op::LoadKey(_)
| Op::LoadDepth(_)
| Op::LoadSubtree(_)
| Op::LoadAll
| Op::LoadOne(_) => CardClass::Constant,
Op::And(_, _) | Op::Sub(_, _) => CardClass::Subset,
Op::Or(_, _) => CardClass::Expand,
Op::FieldOf(_) => CardClass::Subset,
Op::Descendants | Op::AllChildren => CardClass::Expand,
Op::ValueEqLit(_) => CardClass::Subset,
Op::First | Op::Last => CardClass::Reduce(1),
Op::Nth(_) => CardClass::Reduce(1),
Op::Count => CardClass::Reduce(1),
}
}
pub fn supports_demand(&self) -> bool {
matches!(
self,
Op::First | Op::Last | Op::Nth(_) | Op::Count
)
}
}
pub struct Ctx<'a> {
pub idx: &'a StructuralIndex,
pub bytes: &'a [u8],
}
impl Op {
pub fn apply(&self, state: &Bitmap, ctx: &Ctx) -> Bitmap {
match self {
Op::LoadKey(name) => {
let mut bm = Bitmap::new();
for t in ctx.idx.keys_named(name, None) {
bm.add(t.raw());
}
bm
}
Op::LoadDepth(d) => {
let mut bm = Bitmap::new();
for tok in ctx.idx.tokens() {
if ctx.idx.depth(tok) == *d {
bm.add(tok.raw());
}
}
bm
}
Op::LoadSubtree(root) => ctx.idx.subtree_bitmap(*root),
Op::LoadAll => {
let mut bm = Bitmap::new();
bm.add_range(0..=ctx.idx.token_count().saturating_sub(1));
bm
}
Op::LoadOne(tok) => {
let mut bm = Bitmap::new();
bm.add(tok.raw());
bm
}
Op::And(a, b) => {
let mut x = a.apply(state, ctx);
let y = b.apply(state, ctx);
x.and_inplace(&y);
x
}
Op::Or(a, b) => {
let mut x = a.apply(state, ctx);
let y = b.apply(state, ctx);
x.or_inplace(&y);
x
}
Op::Sub(a, b) => {
let mut x = a.apply(state, ctx);
let y = b.apply(state, ctx);
x.andnot_inplace(&y);
x
}
Op::FieldOf(name) => {
let mut out = Bitmap::new();
for raw in state.iter() {
let tok = TokenId(raw);
if let Some(v) = ctx.idx.field_of(tok, name) {
out.add(v.raw());
}
}
out
}
Op::Descendants => {
let mut out = Bitmap::new();
for raw in state.iter() {
let tok = TokenId(raw);
let sub = ctx.idx.subtree_bitmap(tok);
out.or_inplace(&sub);
}
out
}
Op::AllChildren => {
let mut out = Bitmap::new();
for raw in state.iter() {
let parent_tok = TokenId(raw);
let sub = ctx.idx.subtree_bitmap(parent_tok);
for child_raw in sub.iter() {
let c = TokenId(child_raw);
if ctx.idx.parent(c) == Some(parent_tok) {
out.add(child_raw);
}
}
}
out
}
Op::ValueEqLit(lit) => {
let mut out = Bitmap::new();
for raw in state.iter() {
let tok = TokenId(raw);
let span = ctx.idx.byte_span_in(tok, ctx.bytes);
let v = &ctx.bytes[span.start as usize..span.end as usize];
if json_string_eq(v, lit) {
out.add(raw);
}
}
out
}
Op::First => {
let mut out = Bitmap::new();
if let Some(min) = state.minimum() {
out.add(min);
}
out
}
Op::Last => {
let mut out = Bitmap::new();
if let Some(max) = state.maximum() {
out.add(max);
}
out
}
Op::Nth(k) => {
let v = state.to_vec();
let mut out = Bitmap::new();
if let Some(&t) = v.get(*k as usize) {
out.add(t);
}
out
}
Op::Count => {
let n = state.cardinality();
let mut out = Bitmap::new();
if n <= u32::MAX as u64 {
out.add(n as u32);
}
out
}
}
}
}
#[derive(Clone, Debug, Default)]
pub struct OpPlan {
pub ops: Vec<Op>,
}
impl OpPlan {
pub fn new() -> Self {
Self { ops: Vec::new() }
}
pub fn push(mut self, op: Op) -> Self {
self.ops.push(op);
self
}
pub fn anchor(self, root: TokenId) -> Self {
self.push(Op::LoadOne(root))
}
pub fn all(self) -> Self {
self.push(Op::LoadAll)
}
pub fn by_key(self, name: &str) -> Self {
self.push(Op::LoadKey(Arc::<str>::from(name)))
}
pub fn at_depth(self, d: u16) -> Self {
self.push(Op::LoadDepth(d))
}
pub fn within(self, root: TokenId) -> Self {
self.push(Op::LoadSubtree(root))
}
pub fn descend(self) -> Self {
self.push(Op::Descendants)
}
pub fn children(self) -> Self {
self.push(Op::AllChildren)
}
pub fn field(self, name: &str) -> Self {
self.push(Op::FieldOf(Arc::<str>::from(name)))
}
pub fn value_eq(self, lit: &[u8]) -> Self {
self.push(Op::ValueEqLit(lit.to_vec()))
}
pub fn first(self) -> Self {
self.push(Op::First)
}
pub fn last(self) -> Self {
self.push(Op::Last)
}
pub fn nth(self, k: u32) -> Self {
self.push(Op::Nth(k))
}
pub fn count(self) -> Self {
self.push(Op::Count)
}
pub fn optimize(mut self) -> Self {
loop {
let n_before = self.ops.len();
self.ops = optimize_pass(self.ops);
if self.ops.len() == n_before {
break;
}
}
self
}
pub fn run(&self, idx: &StructuralIndex, bytes: &[u8]) -> Bitmap {
let ctx = Ctx { idx, bytes };
let mut state = {
let mut all = Bitmap::new();
all.add_range(0..=idx.token_count().saturating_sub(1));
all
};
for op in &self.ops {
match op.card_class() {
CardClass::Constant => {
let loaded = op.apply(&state, &ctx);
state.and_inplace(&loaded);
}
_ => {
state = op.apply(&state, &ctx);
}
}
}
state
}
}
fn optimize_pass(ops: Vec<Op>) -> Vec<Op> {
let mut out: Vec<Op> = Vec::with_capacity(ops.len());
let mut iter = ops.into_iter().peekable();
while let Some(op) = iter.next() {
match (op, iter.peek().cloned()) {
(Op::LoadAll, Some(next))
if matches!(next.card_class(), CardClass::Constant | CardClass::Subset) =>
{
continue;
}
(Op::Descendants, Some(Op::LoadKey(k))) => {
iter.next(); out.push(Op::Descendants);
out.push(Op::LoadKey(k));
continue;
}
(Op::LoadKey(a), Some(Op::LoadKey(b))) => {
iter.next();
out.push(Op::And(
Box::new(Op::LoadKey(a)),
Box::new(Op::LoadKey(b)),
));
continue;
}
(op, _) => out.push(op),
}
}
out
}
pub fn run_first(plan: &OpPlan, idx: &StructuralIndex, bytes: &[u8]) -> Option<TokenId> {
plan.run(idx, bytes).minimum().map(TokenId)
}
pub fn run_count(plan: &OpPlan, idx: &StructuralIndex, bytes: &[u8]) -> u64 {
plan.run(idx, bytes).cardinality()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::api::from_bytes;
#[test]
fn simple_by_key() {
let buf = br#"{"a":1,"b":2,"c":3}"#;
let idx = from_bytes(buf).unwrap();
let plan = OpPlan::new().by_key("b");
let n = run_count(&plan, &idx, buf);
assert_eq!(n, 1);
}
#[test]
fn key_at_depth() {
let buf = br#"{"x":1,"nested":{"x":2,"y":3}}"#;
let idx = from_bytes(buf).unwrap();
let n_all = run_count(&OpPlan::new().by_key("x"), &idx, buf);
assert_eq!(n_all, 2);
let n_d1 = run_count(
&OpPlan::new()
.by_key("x")
.at_depth(1),
&idx,
buf,
);
assert_eq!(n_d1, 1);
}
#[test]
fn descend_then_by_key() {
let buf = br#"{"a":{"x":1,"y":2},"b":{"x":3,"z":4}}"#;
let idx = from_bytes(buf).unwrap();
let plan = OpPlan::new()
.anchor(TokenId(0)) .descend()
.by_key("x");
let n = run_count(&plan, &idx, buf);
assert_eq!(n, 2);
}
#[test]
fn within_subtree_restricts() {
let buf = br#"{"a":{"x":1},"b":{"x":2}}"#;
let idx = from_bytes(buf).unwrap();
let a_obj = idx
.field_of(TokenId(0), "a")
.expect("a exists");
let n = run_count(
&OpPlan::new().by_key("x").within(a_obj),
&idx,
buf,
);
assert_eq!(n, 1);
}
#[test]
fn value_eq_predicate() {
let buf = br#"[{"k":"a","v":1},{"k":"b","v":2},{"k":"c","v":1}]"#;
let idx = from_bytes(buf).unwrap();
let plan = OpPlan::new().by_key("v").value_eq(b"1");
let n = run_count(&plan, &idx, buf);
let _ = n;
let mut hits = 0u64;
for k in idx.keys_named("v", None) {
let v = idx.value_for_key(k).unwrap();
let span = idx.byte_span_in(v, buf);
if &buf[span.start as usize..span.end as usize] == b"1" {
hits += 1;
}
}
assert_eq!(hits, 2);
}
#[test]
fn chain_query_first() {
let buf = br#"{"a":{"x":{"b":"hit"},"y":{"b":"miss"}}}"#;
let idx = from_bytes(buf).unwrap();
let a_obj = idx.field_of(TokenId(0), "a").unwrap();
let plan = OpPlan::new()
.anchor(a_obj)
.descend()
.by_key("b")
.first();
let first = run_first(&plan, &idx, buf).unwrap();
let span = idx.byte_span_in(idx.value_for_key(first).unwrap(), buf);
let v = std::str::from_utf8(&buf[span.start as usize..span.end as usize]).unwrap();
assert_eq!(v, "\"hit\"");
}
#[test]
fn at_depth_filters() {
let buf = br#"{"a":1,"b":2}"#;
let idx = from_bytes(buf).unwrap();
let n_d1 = run_count(&OpPlan::new().at_depth(1), &idx, buf);
let n_keys_d1 = run_count(
&OpPlan::new().by_key("a").at_depth(1),
&idx,
buf,
);
assert!(n_d1 > n_keys_d1);
assert_eq!(n_keys_d1, 1);
}
#[test]
fn optimize_collapses_load_all() {
let p = OpPlan::new().all().by_key("x").optimize();
assert!(matches!(p.ops[0], Op::LoadKey(_)));
assert_eq!(p.ops.len(), 1);
}
#[test]
fn algebraic_compose() {
let p = OpPlan::new().by_key("a").by_key("b").optimize();
assert!(matches!(p.ops[0], Op::And(_, _)));
assert_eq!(p.ops.len(), 1);
}
#[test]
fn deeply_nested_chain() {
let buf = br#"{"store":{"sections":[{"items":[{"in_stock":true,"sku":"a"},{"in_stock":false,"sku":"b"}]},{"items":[{"in_stock":true,"sku":"c"}]}]}}"#;
let idx = from_bytes(buf).unwrap();
let store = idx.field_of(TokenId(0), "store").unwrap();
let plan = OpPlan::new()
.anchor(store)
.descend()
.by_key("in_stock");
let n = run_count(&plan, &idx, buf);
assert_eq!(n, 3); }
#[test]
fn first_short_circuits_on_singleton() {
let buf = br#"{"items":[{"x":1},{"x":2},{"x":3}]}"#;
let idx = from_bytes(buf).unwrap();
let plan = OpPlan::new().by_key("x").first();
let r = plan.run(&idx, buf);
assert_eq!(r.cardinality(), 1);
}
}