use succinctly::json::{locate::locate_offset, simple, standard, JsonIndex};
fn get_bit(words: &[u64], i: usize) -> bool {
let word_idx = i / 64;
let bit_idx = i % 64;
if word_idx < words.len() {
(words[word_idx] >> bit_idx) & 1 == 1
} else {
false
}
}
fn bits_to_string(words: &[u64], n: usize) -> String {
(0..n)
.map(|i| if get_bit(words, i) { '1' } else { '0' })
.collect()
}
fn count_ones(words: &[u64], n: usize) -> usize {
(0..n).filter(|&i| get_bit(words, i)).count()
}
mod simple_cursor {
use super::*;
#[test]
fn test_empty_json() {
let semi = simple::build_semi_index(b"{}");
assert_eq!(bits_to_string(&semi.ib, 2), "11");
assert_eq!(bits_to_string(&semi.bp, 4), "1100");
let semi = simple::build_semi_index(b"[]");
assert_eq!(bits_to_string(&semi.ib, 2), "11");
assert_eq!(bits_to_string(&semi.bp, 4), "1100");
}
#[test]
fn test_simple_values() {
let semi = simple::build_semi_index(b"[1,2,3]");
assert_eq!(count_ones(&semi.ib, 7), 4);
let json = br#"{"key":"value"}"#;
let semi = simple::build_semi_index(json);
assert_eq!(count_ones(&semi.ib, json.len()), 3);
}
#[test]
fn test_nested_structures() {
let json = br#"{"a":{"b":{"c":1}}}"#;
let semi = simple::build_semi_index(json);
let bp_len = semi.bp.iter().map(|w| w.count_ones()).sum::<u32>() as usize * 2;
assert!(bp_len > 0);
let json = b"[[[]]]";
let semi = simple::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 6), "111111");
assert_eq!(bits_to_string(&semi.bp, 12), "111111000000");
}
#[test]
fn test_mixed_structures() {
let json = br#"{"arr":[1,2],"obj":{"x":3}}"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
}
#[test]
fn test_string_escapes() {
let json = br#"{"a":"b\"c"}"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
let json = br#"{"a":"b\\c"}"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
let json = br#"{"a":"b\\\"c"}"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
}
#[test]
fn test_unicode_in_strings() {
let json = b"{\"emoji\":\"\\u1F389\"}";
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
let json = br#"{"unicode":"\u0041"}"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
}
#[test]
fn test_whitespace_variations() {
let json1 = br#"{"a":1}"#;
let semi1 = simple::build_semi_index(json1);
let json2 = b"{ \"a\" : 1 }";
let semi2 = simple::build_semi_index(json2);
assert_eq!(count_ones(&semi1.ib, json1.len()), 3); assert_eq!(count_ones(&semi2.ib, json2.len()), 3); }
#[test]
fn test_unterminated_states() {
let json = br#"{"key":"value"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InString);
let json = br#"{"key":"\"#;
let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InEscape);
}
#[test]
fn test_large_json() {
let mut json = Vec::new();
json.push(b'[');
for i in 0..1000 {
if i > 0 {
json.push(b',');
}
json.extend_from_slice(format!("{}", i).as_bytes());
}
json.push(b']');
let semi = simple::build_semi_index(&json);
assert_eq!(semi.state, simple::State::InJson);
assert_eq!(count_ones(&semi.ib, json.len()), 1001);
}
}
mod standard_cursor {
use super::*;
#[test]
fn test_empty_json() {
let semi = standard::build_semi_index(b"{}");
assert_eq!(bits_to_string(&semi.ib, 2), "10"); assert_eq!(bits_to_string(&semi.bp, 2), "10");
let semi = standard::build_semi_index(b"[]");
assert_eq!(bits_to_string(&semi.ib, 2), "10");
assert_eq!(bits_to_string(&semi.bp, 2), "10");
}
#[test]
fn test_values_as_leaves() {
let json = b"[1,2,3]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 7), "1101010");
assert_eq!(bits_to_string(&semi.bp, 8), "11010100");
}
#[test]
fn test_string_values() {
let json = br#"{"a":"b"}"#;
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 9), "110001000");
}
#[test]
fn test_boolean_null_values() {
let json = b"[true]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 6), "110000");
let json = b"[false]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 7), "1100000");
let json = b"[null]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 6), "110000");
}
#[test]
fn test_number_formats() {
let json = b"[-123]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 6), "110000");
let json = b"[3.14]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 6), "110000");
let json = b"[1e10]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 6), "110000");
let json = b"[1e+10]";
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 7), "1100000");
}
#[test]
fn test_nested_structures() {
let json = br#"{"a":{"b":1}}"#;
let semi = standard::build_semi_index(json);
assert_eq!(bits_to_string(&semi.ib, 13), "1100011000100");
}
#[test]
fn test_value_state_transitions() {
let json = b"[123]";
let semi = standard::build_semi_index(json);
assert_eq!(semi.state, standard::State::InJson);
let json = b"[1,2]";
let semi = standard::build_semi_index(json);
assert_eq!(semi.state, standard::State::InJson);
let json = b"[123 ]";
let semi = standard::build_semi_index(json);
assert_eq!(semi.state, standard::State::InJson);
}
#[test]
fn test_complex_json() {
let json = br#"{"items":[{"id":1,"name":"foo"},{"id":2,"name":"bar"}],"count":2}"#;
let semi = standard::build_semi_index(json);
assert_eq!(semi.state, standard::State::InJson);
assert!(!semi.bp.is_empty());
}
#[test]
fn test_large_json() {
let mut json = Vec::new();
json.extend_from_slice(br#"{"items":["#);
for i in 0..500 {
if i > 0 {
json.push(b',');
}
json.extend_from_slice(format!(r#"{{"id":{},"value":"item{}"}}"#, i, i).as_bytes());
}
json.extend_from_slice(br#"]}"#);
let semi = standard::build_semi_index(&json);
assert_eq!(semi.state, standard::State::InJson);
}
}
#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))]
mod simd_comparison {
use super::*;
use rand::prelude::*;
use succinctly::json::simd;
fn compare_results(json: &[u8]) {
let scalar = standard::build_semi_index(json);
let simd = simd::build_semi_index_standard(json);
assert_eq!(
bits_to_string(&scalar.ib, json.len()),
bits_to_string(&simd.ib, json.len()),
"IB mismatch for JSON: {:?}",
String::from_utf8_lossy(json)
);
assert_eq!(
scalar.state,
simd.state,
"State mismatch for JSON: {:?}",
String::from_utf8_lossy(json)
);
let scalar_bp_len = scalar.bp.len() * 64;
let simd_bp_len = simd.bp.len() * 64;
let min_len = scalar_bp_len.min(simd_bp_len);
assert_eq!(
bits_to_string(&scalar.bp, min_len),
bits_to_string(&simd.bp, min_len),
"BP mismatch for JSON: {:?}",
String::from_utf8_lossy(json)
);
}
#[test]
fn test_simd_empty_structures() {
compare_results(b"{}");
compare_results(b"[]");
compare_results(b"[[]]");
compare_results(b"{{}}"); }
#[test]
fn test_simd_simple_values() {
compare_results(b"[1]");
compare_results(b"[1,2,3]");
compare_results(b"[true,false,null]");
compare_results(b"[-1,+2,3.14]");
}
#[test]
fn test_simd_strings() {
compare_results(br#"{"a":"b"}"#);
compare_results(br#"["hello","world"]"#);
compare_results(br#"{"key":"value with spaces"}"#);
}
#[test]
fn test_simd_escapes() {
compare_results(br#"{"a":"b\"c"}"#);
compare_results(br#"{"a":"b\\c"}"#);
compare_results(br#"{"a":"b\nc"}"#);
compare_results(br#"{"a":"\\\"\\\"}"#);
}
#[test]
fn test_simd_nested() {
compare_results(br#"{"a":{"b":{"c":1}}}"#);
compare_results(br#"[[[1,2],[3,4]],[[5,6]]]"#);
compare_results(br#"{"a":[1,{"b":2}]}"#);
}
#[test]
fn test_simd_whitespace() {
compare_results(b"{ }");
compare_results(b"[ ]");
compare_results(b"{ \"a\" : 1 }");
compare_results(b"[\n 1,\n 2\n]");
}
#[test]
fn test_simd_boundary_sizes() {
for size in [1, 15, 16, 17, 31, 32, 33, 47, 48, 49, 63, 64, 65] {
let mut json = Vec::with_capacity(size);
json.push(b'[');
for i in 0..(size.saturating_sub(2)) {
if i > 0 && i % 2 == 0 {
json.push(b',');
} else {
json.push(b'1');
}
}
if json.len() < size {
json.push(b']');
}
while json.len() < size {
json.insert(1, b' ');
}
json.truncate(size);
if json.last() != Some(&b']') && json.last() != Some(&b' ') {
*json.last_mut().unwrap() = b']';
}
compare_results(&json);
}
}
#[test]
fn test_simd_large_json() {
let json = br#"{"name":"value","number":12345,"array":[1,2,3]}"#;
compare_results(json);
let json = br#"{"long_key_name":"long_value_string","another":"field"}"#;
compare_results(json);
let json = br#"{"items":[{"id":1,"name":"first"},{"id":2,"name":"second"},{"id":3,"name":"third"}]}"#;
compare_results(json);
}
#[test]
fn test_simd_all_characters() {
compare_results(br#"{"a":{"b":[1,2]},"c":[{"d":3}]}"#);
compare_results(br#"[1,-2,3.14,-1.5e+10,true,false,null,"str"]"#);
}
#[test]
fn test_simd_long_strings() {
let json = br#"{"key":"this is a very long string value that exceeds 16 bytes"}"#;
compare_results(json);
let json = br#"{"a":"0123456789012345","b":"0123456789012345"}"#;
compare_results(json);
}
#[test]
fn test_simd_escape_at_boundary() {
for padding in 0..20 {
let mut json = Vec::new();
json.extend_from_slice(br#"{""#); json.extend(std::iter::repeat_n(b'x', padding));
json.extend_from_slice(br#"":"a\"b"}"#); compare_results(&json);
}
}
#[test]
fn test_simd_random_valid_json() {
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
let mut rng = ChaCha8Rng::seed_from_u64(42);
for _ in 0..100 {
let json = generate_random_json(&mut rng, 3, 50);
compare_results(&json);
}
}
fn generate_random_json<R: Rng>(rng: &mut R, max_depth: usize, max_size: usize) -> Vec<u8> {
let mut result = Vec::new();
generate_random_value(rng, &mut result, max_depth, max_size);
result
}
fn generate_random_value<R: Rng>(
rng: &mut R,
out: &mut Vec<u8>,
depth: usize,
max_size: usize,
) {
if out.len() >= max_size || depth == 0 {
match rng.gen_range(0..5) {
0 => out.extend_from_slice(b"null"),
1 => out.extend_from_slice(b"true"),
2 => out.extend_from_slice(b"false"),
3 => out.extend_from_slice(format!("{}", rng.gen_range(-100..100)).as_bytes()),
_ => {
out.push(b'"');
let len = rng.gen_range(0..10);
for _ in 0..len {
let c = rng.gen_range(b'a'..=b'z');
out.push(c);
}
out.push(b'"');
}
}
return;
}
match rng.gen_range(0..7) {
0 => out.extend_from_slice(b"null"),
1 => out.extend_from_slice(b"true"),
2 => out.extend_from_slice(b"false"),
3 => out.extend_from_slice(format!("{}", rng.gen_range(-100..100)).as_bytes()),
4 => {
out.push(b'"');
let len = rng.gen_range(0..10);
for _ in 0..len {
let c = rng.gen_range(b'a'..=b'z');
out.push(c);
}
out.push(b'"');
}
5 => {
out.push(b'[');
let count = rng.gen_range(0..4);
for i in 0..count {
if i > 0 {
out.push(b',');
}
generate_random_value(rng, out, depth - 1, max_size);
}
out.push(b']');
}
_ => {
out.push(b'{');
let count = rng.gen_range(0..3);
for i in 0..count {
if i > 0 {
out.push(b',');
}
out.push(b'"');
out.push(rng.gen_range(b'a'..=b'z'));
out.push(b'"');
out.push(b':');
generate_random_value(rng, out, depth - 1, max_size);
}
out.push(b'}');
}
}
}
}
mod bp_structure {
use super::*;
use succinctly::bp::BalancedParens;
#[test]
fn test_simple_bp_navigation() {
let semi = standard::build_semi_index(b"{}");
assert_eq!(bits_to_string(&semi.bp, 2), "10");
let bp = BalancedParens::new(semi.bp, 2);
assert!(bp.is_open(0));
assert!(!bp.is_open(1));
assert_eq!(bp.find_close(0), Some(1));
}
#[test]
fn test_nested_bp_navigation() {
let semi = standard::build_semi_index(br#"{"a":1}"#);
let bp_actual_len = 6; let bp = BalancedParens::new(semi.bp.clone(), bp_actual_len);
assert!(bp.is_open(0));
assert_eq!(bp.find_close(0), Some(5));
}
#[test]
fn test_array_bp_navigation() {
let semi = standard::build_semi_index(b"[1,2]");
let bp = BalancedParens::new(semi.bp.clone(), 6);
assert!(bp.is_open(0));
assert_eq!(bp.find_close(0), Some(5));
assert!(bp.is_open(1));
assert_eq!(bp.find_close(1), Some(2));
assert!(bp.is_open(3));
assert_eq!(bp.find_close(3), Some(4)); }
#[test]
fn test_deeply_nested_bp() {
let semi = simple::build_semi_index(b"[[[[]]]]");
let bp_len = 16;
let bp = BalancedParens::new(semi.bp.clone(), bp_len);
assert!(bp.is_open(0));
assert_eq!(bp.find_close(0), Some(15));
assert!(bp.is_open(2));
assert_eq!(bp.find_close(2), Some(13));
assert!(bp.is_open(6));
assert_eq!(bp.find_close(6), Some(9));
}
}
mod edge_cases {
use super::*;
#[test]
fn test_empty_input() {
let semi = simple::build_semi_index(b"");
assert!(semi.ib.is_empty() || semi.ib[0] == 0);
assert!(semi.bp.is_empty() || semi.bp[0] == 0);
let semi = standard::build_semi_index(b"");
assert!(semi.ib.is_empty() || semi.ib[0] == 0);
assert!(semi.bp.is_empty() || semi.bp[0] == 0);
}
#[test]
fn test_single_characters() {
let semi = simple::build_semi_index(b"{");
assert_eq!(bits_to_string(&semi.ib, 1), "1");
let semi = standard::build_semi_index(b"{");
assert_eq!(bits_to_string(&semi.ib, 1), "1");
}
#[test]
fn test_only_whitespace() {
let semi = simple::build_semi_index(b" ");
assert_eq!(count_ones(&semi.ib, 3), 0);
let semi = standard::build_semi_index(b" ");
assert_eq!(count_ones(&semi.ib, 3), 0);
}
#[test]
fn test_consecutive_escapes() {
let json = br#"{"a":"\\\\"}"#; let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
let semi = standard::build_semi_index(json);
assert_eq!(semi.state, standard::State::InJson);
let json = br#"{"a":"\"test\""}"#; let semi = simple::build_semi_index(json);
assert_eq!(semi.state, simple::State::InJson);
let semi = standard::build_semi_index(json);
assert_eq!(semi.state, standard::State::InJson);
}
#[test]
fn test_long_string() {
let mut json = Vec::new();
json.extend_from_slice(br#"{"data":""#);
json.extend(std::iter::repeat_n(b'x', 1000));
json.extend_from_slice(br#""}"#);
let semi = standard::build_semi_index(&json);
assert_eq!(semi.state, standard::State::InJson);
}
#[test]
fn test_many_small_values() {
let mut json = Vec::new();
json.push(b'[');
for i in 0..1000 {
if i > 0 {
json.push(b',');
}
json.push(b'1');
}
json.push(b']');
let semi = standard::build_semi_index(&json);
assert_eq!(semi.state, standard::State::InJson);
assert_eq!(count_ones(&semi.ib, json.len()), 1001);
}
#[test]
fn test_deep_nesting() {
let depth = 100;
let mut json = Vec::new();
json.extend(std::iter::repeat_n(b'[', depth));
json.extend(std::iter::repeat_n(b']', depth));
let semi = standard::build_semi_index(&json);
assert_eq!(semi.state, standard::State::InJson);
}
#[test]
fn test_special_number_formats() {
let cases = [
b"[0]".as_slice(),
b"[-0]",
b"[0.0]",
b"[1e0]",
b"[1E0]",
b"[1e+0]",
b"[1e-0]",
b"[1.0e+10]",
];
for json in cases {
let semi = standard::build_semi_index(json);
assert_eq!(
semi.state,
standard::State::InJson,
"Failed for: {}",
String::from_utf8_lossy(json)
);
}
}
}
mod locate_exhaustive {
use super::*;
fn test_all_offsets(json: &[u8], description: &str) {
let index = JsonIndex::build(json);
let mut failures = Vec::new();
for offset in 0..json.len() {
let result = locate_offset(&index, json, offset);
let ch = json[offset];
let is_structural =
ch == b'{' || ch == b'}' || ch == b'[' || ch == b']' || ch == b':' || ch == b',';
let is_whitespace = ch == b' ' || ch == b'\n' || ch == b'\t' || ch == b'\r';
if result.is_none() && !is_structural && !is_whitespace {
failures.push((offset, ch));
}
}
assert!(
failures.is_empty(),
"{}: Failed to locate {} offsets: {:?}",
description,
failures.len(),
failures
.iter()
.take(10)
.map(|(o, c)| format!("{}:'{}' ", o, *c as char))
.collect::<String>()
);
}
#[test]
fn test_locate_wide_array_100() {
let mut json = br#"{"data":["#.to_vec();
for i in 0..100 {
if i > 0 {
json.push(b',');
}
json.extend(i.to_string().as_bytes());
}
json.extend(b"]}");
test_all_offsets(&json, "Wide array (100 elements)");
}
#[test]
fn test_locate_wide_array_500() {
let mut json = br#"{"data":["#.to_vec();
for i in 0..500 {
if i > 0 {
json.push(b',');
}
json.extend(i.to_string().as_bytes());
}
json.extend(b"]}");
test_all_offsets(&json, "Wide array (500 elements)");
}
#[test]
fn test_locate_deep_nesting_20() {
let mut json = Vec::new();
for i in 0..20 {
json.extend(format!(r#"{{"level{}":"v{}","n":"#, i, i).as_bytes());
}
json.extend(br#""bottom""#);
json.resize(json.len() + 20, b'}');
test_all_offsets(&json, "Deep nesting (20 levels)");
}
#[test]
fn test_locate_mixed_structure_100_users() {
let mut json = br#"{"users":["#.to_vec();
for i in 0..100 {
if i > 0 {
json.push(b',');
}
json.extend(
format!(
r#"{{"id":{},"name":"user{}","tags":["a","b"],"meta":{{"x":{}}}}}"#,
i, i, i
)
.as_bytes(),
);
}
json.extend(b"]}");
test_all_offsets(&json, "Mixed structure (100 users)");
}
#[test]
fn test_locate_string_heavy() {
let mut json = br#"{"strings":["#.to_vec();
for i in 0..200 {
if i > 0 {
json.push(b',');
}
let s = "x".repeat(i % 50 + 1);
json.extend(format!(r#""{}""#, s).as_bytes());
}
json.extend(b"]}");
test_all_offsets(&json, "String-heavy (200 strings)");
}
#[test]
fn test_locate_original_regression_file() {
let json = include_str!("testdata/locate_regression.json");
test_all_offsets(json.as_bytes(), "Original regression file (5KB)");
}
#[test]
fn test_locate_generated_10kb() {
let mut json = br#"{"data":{"records":["#.to_vec();
for i in 0..150 {
if i > 0 {
json.push(b',');
}
json.extend(
format!(
r#"{{"id":{},"value":"item_{}","nested":{{"a":{},"b":{},"c":[1,2,3]}}}}"#,
i,
i,
i * 10,
i * 20
)
.as_bytes(),
);
}
json.extend(b"]}}");
assert!(
json.len() > 10000,
"Generated JSON should be >10KB, got {}",
json.len()
);
test_all_offsets(&json, "Generated 10KB+ JSON");
}
}