use crate::state::context_map::ContextMap;
use crate::state::state_map::StateMap;
use crate::state::state_table::StateTable;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
enum JsonState {
TopLevel = 0,
ObjectKey = 1,
ObjectValue = 2,
ArrayValue = 3,
String = 4,
Number = 5,
Keyword = 6,
}
pub struct JsonModel {
cmap: ContextMap,
smap: StateMap,
state: JsonState,
in_key: bool,
depth: u8,
key_hash: u32,
prev_byte: u8,
escaped: bool,
last_state: u8,
last_hash: u32,
}
impl JsonModel {
pub fn new() -> Self {
Self::with_size(1 << 23) }
pub fn with_size(cmap_size: usize) -> Self {
JsonModel {
cmap: ContextMap::new(cmap_size),
smap: StateMap::new(),
state: JsonState::TopLevel,
in_key: false,
depth: 0,
key_hash: 0,
prev_byte: 0,
escaped: false,
last_state: 0,
last_hash: 0,
}
}
#[inline]
pub fn predict(&mut self, c0: u32, bpos: u8, c1: u8) -> u32 {
if bpos == 0 {
self.update_json_state(c1);
}
let mut h: u32 = 0xCAFEBABE;
h = h.wrapping_mul(0x01000193) ^ (self.state as u32);
h = h.wrapping_mul(0x01000193) ^ (self.in_key as u32);
h = h.wrapping_mul(0x01000193) ^ (self.depth.min(3) as u32);
h = h.wrapping_mul(0x01000193) ^ (c0 & 0xFF);
if self.state == JsonState::ObjectValue || self.state == JsonState::String {
h = h.wrapping_mul(0x01000193) ^ self.key_hash;
}
let state = self.cmap.get(h);
self.last_state = state;
self.last_hash = h;
self.smap.predict(state)
}
#[inline]
pub fn update(&mut self, bit: u8) {
self.smap.update(self.last_state, bit);
let new_state = StateTable::next(self.last_state, bit);
self.cmap.set(self.last_hash, new_state);
}
#[inline]
pub fn json_state_byte(&self) -> u8 {
let state_bits = self.state as u8 & 0x7;
let key_bit = if self.in_key { 8 } else { 0 };
state_bits | key_bit
}
fn update_json_state(&mut self, c1: u8) {
if self.state == JsonState::String {
if self.escaped {
self.escaped = false;
if self.in_key {
self.key_hash = self.key_hash.wrapping_mul(0x01000193) ^ c1 as u32;
}
self.prev_byte = c1;
return;
}
if c1 == b'\\' {
self.escaped = true;
self.prev_byte = c1;
return;
}
if c1 == b'"' {
if self.in_key {
self.state = JsonState::ObjectKey; } else {
self.state = JsonState::ObjectValue; }
self.prev_byte = c1;
return;
}
if self.in_key {
self.key_hash = self.key_hash.wrapping_mul(0x01000193) ^ c1 as u32;
}
self.prev_byte = c1;
return;
}
match c1 {
b'{' => {
self.state = JsonState::ObjectKey;
self.depth = self.depth.saturating_add(1);
}
b'[' => {
self.state = JsonState::ArrayValue;
self.depth = self.depth.saturating_add(1);
}
b'}' | b']' => {
self.depth = self.depth.saturating_sub(1);
self.state = if self.depth > 0 {
JsonState::ObjectValue } else {
JsonState::TopLevel
};
}
b'"' => {
self.state = JsonState::String;
self.in_key = matches!(self.prev_significant_context(), b'{' | b',');
if self.in_key {
self.key_hash = 0; }
}
b':' => {
self.state = JsonState::ObjectValue;
}
b',' => {
self.state = JsonState::ObjectKey;
}
b'0'..=b'9' | b'-' => {
if self.state != JsonState::Number {
self.state = JsonState::Number;
}
}
b't' | b'f' | b'n' => {
if self.state != JsonState::Keyword && self.state != JsonState::String {
self.state = JsonState::Keyword;
}
}
_ => {
}
}
self.prev_byte = c1;
}
#[inline]
fn prev_significant_context(&self) -> u8 {
if self.prev_byte.is_ascii_whitespace() {
b','
} else {
self.prev_byte
}
}
}
impl Default for JsonModel {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn initial_prediction_balanced() {
let mut jm = JsonModel::new();
let p = jm.predict(1, 0, 0);
assert_eq!(p, 2048);
}
#[test]
fn predictions_in_range() {
let mut jm = JsonModel::new();
for c in b"{\"name\":\"Alice\",\"age\":30}" {
for bpos in 0..8u8 {
let bit = (c >> (7 - bpos)) & 1;
let p = jm.predict(1, bpos, if bpos == 0 { *c } else { 0 });
assert!((1..=4095).contains(&p));
jm.update(bit);
}
}
}
#[test]
fn json_state_changes() {
let mut jm = JsonModel::new();
jm.predict(1, 0, b'{');
assert_ne!(jm.state, JsonState::TopLevel);
}
}