use serde_json::Value;
use crate::read::{shape_hit, ReadShape};
pub(crate) struct HitShaper {
pub logical_index: String,
pub partition: String,
pub shape: ReadShape,
}
impl HitShaper {
fn transform(&self, raw: &[u8]) -> Vec<u8> {
match serde_json::from_slice::<Value>(raw) {
Ok(mut hit) => {
shape_hit(&mut hit, &self.logical_index, &self.partition, &self.shape);
serde_json::to_vec(&hit).unwrap_or_else(|_| raw.to_vec())
}
Err(_) => raw.to_vec(),
}
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum Phase {
SeekRoot,
ObjExpectKey,
ObjReadKey,
ObjExpectColon,
ObjExpectValue,
SkipValue,
ObjExpectComma,
ArrExpectElem,
ArrReadElem,
ArrExpectComma,
Passthrough,
}
enum Flow {
Consume,
Redo,
}
#[derive(Default)]
struct Skip {
depth: u32,
in_str: bool,
esc: bool,
scalar: bool,
}
impl Skip {
fn begin(first: u8) -> Self {
match first {
b'"' => Self {
in_str: true,
..Self::default()
},
b'{' | b'[' => Self {
depth: 1,
..Self::default()
},
_ => Self {
scalar: true,
..Self::default()
},
}
}
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum ElemKind {
Unknown,
Str,
Struct,
Scalar,
}
struct Elem {
buf: Vec<u8>,
kind: ElemKind,
depth: u32,
in_str: bool,
esc: bool,
}
impl Elem {
fn new() -> Self {
Self {
buf: Vec::new(),
kind: ElemKind::Unknown,
depth: 0,
in_str: false,
esc: false,
}
}
fn reset(&mut self) {
self.buf.clear();
self.kind = ElemKind::Unknown;
self.depth = 0;
self.in_str = false;
self.esc = false;
}
}
pub(crate) struct SearchHitsScanner {
shaper: HitShaper,
phase: Phase,
level: u8,
key_is_hits: bool,
key_buf: Vec<u8>,
key_esc: bool,
skip: Skip,
elem: Elem,
out: Vec<u8>,
}
impl SearchHitsScanner {
pub(crate) fn new(shaper: HitShaper) -> Self {
Self {
shaper,
phase: Phase::SeekRoot,
level: 0,
key_is_hits: false,
key_buf: Vec::new(),
key_esc: false,
skip: Skip::default(),
elem: Elem::new(),
out: Vec::new(),
}
}
pub(crate) fn feed(&mut self, chunk: &[u8]) -> Vec<u8> {
let mut i = 0;
while i < chunk.len() {
if self.phase == Phase::Passthrough {
self.out.extend_from_slice(&chunk[i..]);
break;
}
self.step(chunk[i]);
i += 1;
}
std::mem::take(&mut self.out)
}
pub(crate) fn finish(&mut self) -> Vec<u8> {
std::mem::take(&mut self.out)
}
fn step(&mut self, b: u8) {
loop {
let flow = match self.phase {
Phase::SeekRoot => self.at_seek_root(b),
Phase::ObjExpectKey => self.at_obj_expect_key(b),
Phase::ObjReadKey => self.at_obj_read_key(b),
Phase::ObjExpectColon => self.at_obj_expect_colon(b),
Phase::ObjExpectValue => self.at_obj_expect_value(b),
Phase::SkipValue => self.at_skip_value(b),
Phase::ObjExpectComma => self.at_obj_expect_comma(b),
Phase::ArrExpectElem => self.at_arr_expect_elem(b),
Phase::ArrReadElem => self.read_elem(b),
Phase::ArrExpectComma => self.at_arr_expect_comma(b),
Phase::Passthrough => {
self.out.push(b);
Flow::Consume
}
};
if matches!(flow, Flow::Consume) {
return;
}
}
}
fn at_seek_root(&mut self, b: u8) -> Flow {
self.out.push(b);
if b == b'{' {
self.level = 1;
self.phase = Phase::ObjExpectKey;
} else if !is_ws(b) {
self.phase = Phase::Passthrough;
}
Flow::Consume
}
fn at_obj_expect_key(&mut self, b: u8) -> Flow {
self.out.push(b);
if b == b'"' {
self.key_buf.clear();
self.key_esc = false;
self.phase = Phase::ObjReadKey;
} else if !is_ws(b) {
self.phase = Phase::Passthrough;
}
Flow::Consume
}
fn at_obj_read_key(&mut self, b: u8) -> Flow {
self.out.push(b);
if self.key_esc {
self.key_esc = false;
self.key_buf.push(b);
} else if b == b'\\' {
self.key_esc = true;
self.key_buf.push(b);
} else if b == b'"' {
self.key_is_hits = decoded_key_is_hits(&self.key_buf);
self.phase = Phase::ObjExpectColon;
} else {
self.key_buf.push(b);
}
Flow::Consume
}
fn at_obj_expect_colon(&mut self, b: u8) -> Flow {
self.out.push(b);
if b == b':' {
self.phase = Phase::ObjExpectValue;
} else if !is_ws(b) {
self.phase = Phase::Passthrough;
}
Flow::Consume
}
fn at_obj_expect_value(&mut self, b: u8) -> Flow {
if is_ws(b) {
self.out.push(b);
return Flow::Consume;
}
self.out.push(b);
if self.key_is_hits {
match (self.level, b) {
(1, b'{') => {
self.level = 2;
self.phase = Phase::ObjExpectKey;
}
(2, b'[') => self.phase = Phase::ArrExpectElem,
_ => self.phase = Phase::Passthrough,
}
} else {
self.skip = Skip::begin(b);
self.phase = Phase::SkipValue;
}
Flow::Consume
}
fn at_skip_value(&mut self, b: u8) -> Flow {
if self.skip.in_str {
self.out.push(b);
if self.skip.esc {
self.skip.esc = false;
} else if b == b'\\' {
self.skip.esc = true;
} else if b == b'"' {
self.skip.in_str = false;
if self.skip.depth == 0 {
self.phase = Phase::ObjExpectComma;
}
}
return Flow::Consume;
}
if self.skip.depth > 0 {
self.out.push(b);
match b {
b'"' => self.skip.in_str = true,
b'{' | b'[' => self.skip.depth += 1,
b'}' | b']' => {
self.skip.depth -= 1;
if self.skip.depth == 0 {
self.phase = Phase::ObjExpectComma;
}
}
_ => {}
}
return Flow::Consume;
}
debug_assert!(self.skip.scalar);
if is_ws(b) || b == b',' || b == b'}' {
self.phase = Phase::ObjExpectComma;
return Flow::Redo;
}
self.out.push(b);
Flow::Consume
}
fn at_obj_expect_comma(&mut self, b: u8) -> Flow {
self.out.push(b);
if b == b',' {
self.phase = Phase::ObjExpectKey;
} else if !is_ws(b) {
self.phase = Phase::Passthrough;
}
Flow::Consume
}
fn at_arr_expect_elem(&mut self, b: u8) -> Flow {
if is_ws(b) {
self.out.push(b);
return Flow::Consume;
}
if b == b']' {
self.out.push(b);
self.phase = Phase::Passthrough;
return Flow::Consume;
}
self.elem.reset();
self.phase = Phase::ArrReadElem;
Flow::Redo
}
fn read_elem(&mut self, b: u8) -> Flow {
match self.elem.kind {
ElemKind::Unknown => {
self.elem.buf.push(b);
self.elem.kind = match b {
b'"' => {
self.elem.in_str = true;
ElemKind::Str
}
b'{' | b'[' => {
self.elem.depth = 1;
ElemKind::Struct
}
_ => ElemKind::Scalar,
};
Flow::Consume
}
ElemKind::Str => {
self.elem.buf.push(b);
if self.elem.esc {
self.elem.esc = false;
} else if b == b'\\' {
self.elem.esc = true;
} else if b == b'"' {
self.finish_elem();
self.phase = Phase::ArrExpectComma;
}
Flow::Consume
}
ElemKind::Struct => {
self.elem.buf.push(b);
self.read_struct_elem_byte(b);
Flow::Consume
}
ElemKind::Scalar => {
if is_ws(b) || b == b',' || b == b']' {
self.finish_elem();
self.phase = Phase::ArrExpectComma;
Flow::Redo
} else {
self.elem.buf.push(b);
Flow::Consume
}
}
}
}
fn read_struct_elem_byte(&mut self, b: u8) {
if self.elem.in_str {
if self.elem.esc {
self.elem.esc = false;
} else if b == b'\\' {
self.elem.esc = true;
} else if b == b'"' {
self.elem.in_str = false;
}
return;
}
match b {
b'"' => self.elem.in_str = true,
b'{' | b'[' => self.elem.depth += 1,
b'}' | b']' => {
self.elem.depth -= 1;
if self.elem.depth == 0 {
self.finish_elem();
self.phase = Phase::ArrExpectComma;
}
}
_ => {}
}
}
fn at_arr_expect_comma(&mut self, b: u8) -> Flow {
self.out.push(b);
if b == b',' {
self.phase = Phase::ArrExpectElem;
} else if !is_ws(b) {
self.phase = Phase::Passthrough;
}
Flow::Consume
}
fn finish_elem(&mut self) {
let shaped = self.shaper.transform(&self.elem.buf);
self.out.extend_from_slice(&shaped);
self.elem.buf.clear();
}
}
fn is_ws(b: u8) -> bool {
matches!(b, b' ' | b'\t' | b'\n' | b'\r')
}
fn decoded_key_is_hits(raw: &[u8]) -> bool {
let mut lit = Vec::with_capacity(raw.len() + 2);
lit.push(b'"');
lit.extend_from_slice(raw);
lit.push(b'"');
serde_json::from_slice::<String>(&lit).is_ok_and(|s| s == "hits")
}
#[cfg(test)]
#[path = "search_scan_tests.rs"]
mod tests;