#[cfg(not(test))]
use alloc::vec::Vec;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use super::BitWriter;
const DOUBLE_QUOTE: u8 = b'"';
const BACKSLASH: u8 = b'\\';
const OPEN_BRACE: u8 = b'{';
const CLOSE_BRACE: u8 = b'}';
const OPEN_BRACKET: u8 = b'[';
const CLOSE_BRACKET: u8 = b']';
const COMMA: u8 = b',';
const COLON: u8 = b':';
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum State {
InJson,
InString,
InEscape,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SemiIndex {
pub state: State,
pub ib: Vec<u64>,
pub bp: Vec<u64>,
}
impl SemiIndex {
#[inline]
pub fn ib_as_bytes(&self) -> &[u8] {
crate::binary::words_to_bytes(&self.ib)
}
#[inline]
pub fn bp_as_bytes(&self) -> &[u8] {
crate::binary::words_to_bytes(&self.bp)
}
pub fn from_bytes(ib_bytes: &[u8], bp_bytes: &[u8]) -> Self {
Self {
state: State::InJson, ib: crate::binary::bytes_to_words_vec(ib_bytes),
bp: crate::binary::bytes_to_words_vec(bp_bytes),
}
}
}
#[inline]
fn is_open(c: u8) -> bool {
c == OPEN_BRACKET || c == OPEN_BRACE
}
#[inline]
fn is_close(c: u8) -> bool {
c == CLOSE_BRACKET || c == CLOSE_BRACE
}
#[inline]
fn is_delim(c: u8) -> bool {
c == COMMA || c == COLON
}
pub fn build_semi_index(json: &[u8]) -> SemiIndex {
let word_capacity = json.len().div_ceil(64);
let mut ib = BitWriter::with_capacity(word_capacity);
let mut bp = BitWriter::with_capacity(word_capacity * 2);
let mut state = State::InJson;
for &c in json {
match state {
State::InJson => {
if is_open(c) {
bp.write_1();
bp.write_1();
ib.write_1();
} else if is_close(c) {
bp.write_0();
bp.write_0();
ib.write_1();
} else if is_delim(c) {
bp.write_0();
bp.write_1();
ib.write_1();
} else if c == DOUBLE_QUOTE {
ib.write_0();
state = State::InString;
} else {
ib.write_0();
}
}
State::InString => {
ib.write_0();
if c == DOUBLE_QUOTE {
state = State::InJson;
} else if c == BACKSLASH {
state = State::InEscape;
}
}
State::InEscape => {
ib.write_0();
state = State::InString;
}
}
}
SemiIndex {
state,
ib: ib.finish(),
bp: bp.finish(),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn get_bit(words: &[u64], i: usize) -> bool {
let word_idx = i / 64;
let bit_idx = i % 64;
if word_idx < words.len() {
(words[word_idx] >> bit_idx) & 1 == 1
} else {
false
}
}
fn bits_to_string(words: &[u64], n: usize) -> String {
(0..n)
.map(|i| if get_bit(words, i) { '1' } else { '0' })
.collect()
}
#[test]
fn test_empty_object() {
let semi = build_semi_index(b"{}");
assert_eq!(bits_to_string(&semi.ib, 2), "11");
assert_eq!(bits_to_string(&semi.bp, 4), "1100");
assert_eq!(semi.state, State::InJson);
}
#[test]
fn test_empty_array() {
let semi = build_semi_index(b"[]");
assert_eq!(bits_to_string(&semi.ib, 2), "11");
assert_eq!(bits_to_string(&semi.bp, 4), "1100");
}
#[test]
fn test_simple_string() {
let semi = build_semi_index(br#"{"a":"b"}"#);
assert_eq!(bits_to_string(&semi.ib, 9), "100010001");
assert_eq!(bits_to_string(&semi.bp, 6), "110100");
}
#[test]
fn test_array_with_values() {
let semi = build_semi_index(b"[1,2,3]");
assert_eq!(bits_to_string(&semi.ib, 7), "1010101");
assert_eq!(bits_to_string(&semi.bp, 8), "11010100");
}
#[test]
fn test_nested_object() {
let semi = build_semi_index(br#"{"a":{"b":1}}"#);
assert_eq!(bits_to_string(&semi.ib, 13), "1000110001011");
}
#[test]
fn test_escaped_quote() {
let semi = build_semi_index(br#""a\"b""#);
assert_eq!(bits_to_string(&semi.ib, 6), "000000");
assert!(semi.bp.is_empty() || semi.bp[0] == 0);
}
#[test]
fn test_escaped_backslash() {
let semi = build_semi_index(br#""a\\b""#);
assert_eq!(bits_to_string(&semi.ib, 6), "000000");
assert_eq!(semi.state, State::InJson);
}
#[test]
fn test_whitespace_ignored() {
let semi = build_semi_index(b"{ \"a\" : 1 }");
assert_eq!(bits_to_string(&semi.ib, 11), "10000010001");
}
#[test]
fn test_final_state_in_string() {
let semi = build_semi_index(br#"{"a"#);
assert_eq!(semi.state, State::InString);
}
#[test]
fn test_final_state_in_escape() {
let semi = build_semi_index(br#""\"#);
assert_eq!(semi.state, State::InEscape);
}
}