#![allow(clippy::items_after_test_module)]
#[cfg(not(test))]
use alloc::vec;
#[cfg(not(test))]
use alloc::{borrow::Cow, string::String, vec::Vec};
#[cfg(test)]
use std::borrow::Cow;
use crate::trees::BalancedParens;
use crate::util::broadword::select_in_word;
#[derive(Clone, Debug)]
pub struct JsonIndex<W = Vec<u64>> {
ib: W,
ib_len: usize,
ib_rank: Vec<u32>,
bp: BalancedParens<W>,
newlines: crate::bits::BitVec,
}
fn build_ib_rank(words: &[u64]) -> Vec<u32> {
let mut rank = Vec::with_capacity(words.len() + 1);
let mut cumulative: u32 = 0;
rank.push(0); for &word in words {
cumulative += word.count_ones();
rank.push(cumulative);
}
rank
}
fn build_newline_index(text: &[u8]) -> crate::bits::BitVec {
if text.is_empty() {
return crate::bits::BitVec::new();
}
let mut bits = vec![0u64; text.len().div_ceil(64)];
let mut i = 0;
while i < text.len() {
match text[i] {
b'\n' => {
let next = i + 1;
if next < text.len() {
bits[next / 64] |= 1 << (next % 64);
}
i += 1;
}
b'\r' => {
let next = if i + 1 < text.len() && text[i + 1] == b'\n' {
i + 2
} else {
i + 1
};
if next < text.len() {
bits[next / 64] |= 1 << (next % 64);
}
i = next;
}
_ => i += 1,
}
}
crate::bits::BitVec::from_words(bits, text.len())
}
impl JsonIndex<Vec<u64>> {
pub fn build(json: &[u8]) -> Self {
#[cfg(any(target_arch = "aarch64", target_arch = "x86_64"))]
let semi = crate::json::simd::build_semi_index_standard(json);
#[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
let semi = crate::json::standard::build_semi_index(json);
let ib_len = json.len();
let bp_bit_count = count_bp_bits(&semi.bp);
let ib_rank = build_ib_rank(&semi.ib);
let newlines = build_newline_index(json);
Self {
ib: semi.ib,
ib_len,
ib_rank,
bp: BalancedParens::new(semi.bp, bp_bit_count),
newlines,
}
}
}
impl<W: AsRef<[u64]>> JsonIndex<W> {
pub fn from_parts(ib: W, ib_len: usize, bp: W, bp_len: usize) -> Self {
let ib_rank = build_ib_rank(ib.as_ref());
Self {
ib,
ib_len,
ib_rank,
bp: BalancedParens::from_words(bp, bp_len),
newlines: crate::bits::BitVec::new(),
}
}
pub fn from_parts_with_newlines(
ib: W,
ib_len: usize,
bp: W,
bp_len: usize,
newlines: crate::bits::BitVec,
) -> Self {
let ib_rank = build_ib_rank(ib.as_ref());
Self {
ib,
ib_len,
ib_rank,
bp: BalancedParens::from_words(bp, bp_len),
newlines,
}
}
#[inline]
pub fn ib(&self) -> &[u64] {
self.ib.as_ref()
}
#[inline]
pub fn ib_len(&self) -> usize {
self.ib_len
}
#[inline]
pub fn bp(&self) -> &BalancedParens<W> {
&self.bp
}
#[inline]
pub fn to_line_column(&self, offset: usize) -> (usize, usize) {
use crate::RankSelect;
if self.newlines.is_empty() {
return (1, offset + 1);
}
let markers_before_or_at = self.newlines.rank1(offset + 1);
let line = 1 + markers_before_or_at;
let line_start = if line == 1 {
0
} else {
self.newlines.select1(line - 2).unwrap_or(0)
};
let column = offset - line_start + 1;
(line, column)
}
#[inline]
pub fn to_offset(&self, line: usize, column: usize) -> Option<usize> {
use crate::RankSelect;
if line == 0 || column == 0 {
return None;
}
let line_start = if line == 1 {
0
} else {
self.newlines.select1(line - 2)?
};
let offset = line_start + column - 1;
if offset < self.ib_len {
Some(offset)
} else {
None
}
}
#[inline]
pub fn root<'a>(&'a self, text: &'a [u8]) -> JsonCursor<'a, W> {
JsonCursor {
text,
index: self,
bp_pos: 0,
}
}
#[inline]
pub fn ib_select1_from(&self, k: usize, hint: usize) -> Option<usize> {
let words = self.ib.as_ref();
if words.is_empty() {
return None;
}
let k32 = k as u32;
let n = words.len();
let hint = hint.min(n.saturating_sub(1));
let hint_rank = self.ib_rank[hint + 1];
let lo;
let hi;
if hint_rank <= k32 {
let mut bound = 1usize;
let mut prev = hint;
loop {
let next = (hint + bound).min(n);
if next >= n || self.ib_rank[next + 1] > k32 {
lo = prev;
hi = next;
break;
}
prev = next;
bound *= 2;
}
} else {
let mut bound = 1usize;
let mut prev = hint;
loop {
let next = hint.saturating_sub(bound);
if next == 0 || self.ib_rank[next + 1] <= k32 {
lo = next;
hi = prev;
break;
}
prev = next;
bound *= 2;
}
}
let mut lo = lo;
let mut hi = hi;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.ib_rank[mid + 1] <= k32 {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo >= n {
return None;
}
let remaining = k - self.ib_rank[lo] as usize;
let word = words[lo];
let bit_pos = select_in_word(word, remaining as u32) as usize;
let result = lo * 64 + bit_pos;
if result < self.ib_len {
Some(result)
} else {
None
}
}
#[inline]
pub fn ib_select1(&self, k: usize) -> Option<usize> {
let words = self.ib.as_ref();
if words.is_empty() {
return None;
}
let k32 = k as u32;
let n = words.len();
let mut lo = 0usize;
let mut hi = n;
while lo < hi {
let mid = lo + (hi - lo) / 2;
if self.ib_rank[mid + 1] <= k32 {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo >= n {
return None;
}
let remaining = k - self.ib_rank[lo] as usize;
let word = words[lo];
let bit_pos = select_in_word(word, remaining as u32) as usize;
let result = lo * 64 + bit_pos;
if result < self.ib_len {
Some(result)
} else {
None
}
}
pub fn ib_rank1(&self, pos: usize) -> usize {
if pos == 0 {
return 0;
}
let words = self.ib.as_ref();
let word_idx = pos / 64;
let bit_idx = pos % 64;
let mut count = self.ib_rank[word_idx.min(words.len())] as usize;
if word_idx < words.len() && bit_idx > 0 {
let mask = (1u64 << bit_idx) - 1;
count += (words[word_idx] & mask).count_ones() as usize;
}
count
}
}
fn count_bp_bits(bp_words: &[u64]) -> usize {
let total_ones: usize = bp_words.iter().map(|w| w.count_ones() as usize).sum();
total_ones * 2
}
#[derive(Debug)]
pub struct JsonCursor<'a, W = Vec<u64>> {
text: &'a [u8],
index: &'a JsonIndex<W>,
bp_pos: usize,
}
impl<'a, W> Clone for JsonCursor<'a, W> {
fn clone(&self) -> Self {
*self
}
}
impl<'a, W> Copy for JsonCursor<'a, W> {}
impl<'a, W: AsRef<[u64]>> JsonCursor<'a, W> {
#[inline]
pub fn from_bp_position(index: &'a JsonIndex<W>, text: &'a [u8], bp_pos: usize) -> Self {
Self {
text,
index,
bp_pos,
}
}
#[inline]
pub fn bp_position(&self) -> usize {
self.bp_pos
}
#[inline]
pub fn is_container(&self) -> bool {
self.index.bp().first_child(self.bp_pos).is_some()
}
pub fn text_position(&self) -> Option<usize> {
let rank = self.index.bp().rank1(self.bp_pos);
let hint = rank / 8;
self.index.ib_select1_from(rank, hint)
}
#[inline]
pub fn first_child(&self) -> Option<JsonCursor<'a, W>> {
let new_pos = self.index.bp().first_child(self.bp_pos)?;
Some(JsonCursor {
text: self.text,
index: self.index,
bp_pos: new_pos,
})
}
#[inline]
pub fn next_sibling(&self) -> Option<JsonCursor<'a, W>> {
let new_pos = self.index.bp().next_sibling(self.bp_pos)?;
Some(JsonCursor {
text: self.text,
index: self.index,
bp_pos: new_pos,
})
}
#[inline]
pub fn parent(&self) -> Option<JsonCursor<'a, W>> {
let new_pos = self.index.bp().parent(self.bp_pos)?;
Some(JsonCursor {
text: self.text,
index: self.index,
bp_pos: new_pos,
})
}
pub fn value(&self) -> StandardJson<'a, W> {
let Some(text_pos) = self.text_position() else {
return StandardJson::Error("invalid cursor position");
};
if text_pos >= self.text.len() {
return StandardJson::Error("text position out of bounds");
}
match self.text[text_pos] {
b'{' => StandardJson::Object(JsonFields::from_object_cursor(*self)),
b'[' => StandardJson::Array(JsonElements::from_array_cursor(*self)),
b'"' => StandardJson::String(JsonString {
text: self.text,
start: text_pos,
}),
b't' | b'f' => {
if self.text[text_pos..].starts_with(b"true") {
StandardJson::Bool(true)
} else if self.text[text_pos..].starts_with(b"false") {
StandardJson::Bool(false)
} else {
StandardJson::Error("invalid boolean")
}
}
b'n' => {
if self.text[text_pos..].starts_with(b"null") {
StandardJson::Null
} else {
StandardJson::Error("invalid null")
}
}
c if c == b'-' || c.is_ascii_digit() => StandardJson::Number(JsonNumber {
text: self.text,
start: text_pos,
}),
_ => StandardJson::Error("unexpected character"),
}
}
#[inline]
pub fn children(&self) -> JsonChildren<'a, W> {
JsonChildren {
current: self.first_child(),
}
}
pub fn text_range(&self) -> Option<(usize, usize)> {
let start = self.text_position()?;
if start >= self.text.len() {
return None;
}
let end = match self.text[start] {
b'{' | b'[' => {
let close_char = if self.text[start] == b'{' { b'}' } else { b']' };
let mut depth = 1u32;
let mut i = start + 1;
while i < self.text.len() {
match self.text[i] {
b'"' => {
i += 1;
while i < self.text.len() {
match self.text[i] {
b'"' => {
i += 1;
break;
}
b'\\' => i += 2,
_ => i += 1,
}
}
}
c if c == self.text[start] => {
depth += 1;
i += 1;
}
c if c == close_char => {
depth -= 1;
if depth == 0 {
return Some((start, i + 1));
}
i += 1;
}
_ => i += 1,
}
}
return None;
}
b'"' => {
let mut i = start + 1;
while i < self.text.len() {
match self.text[i] {
b'"' => return Some((start, i + 1)),
b'\\' => i += 2,
_ => i += 1,
}
}
self.text.len()
}
b't' => {
if self.text[start..].starts_with(b"true") {
start + 4
} else {
return None;
}
}
b'f' => {
if self.text[start..].starts_with(b"false") {
start + 5
} else {
return None;
}
}
b'n' => {
if self.text[start..].starts_with(b"null") {
start + 4
} else {
return None;
}
}
c if c == b'-' || c.is_ascii_digit() => {
let mut i = start;
while i < self.text.len() {
match self.text[i] {
b'0'..=b'9' | b'-' | b'+' | b'.' | b'e' | b'E' => i += 1,
_ => break,
}
}
i
}
_ => return None,
};
Some((start, end))
}
pub fn raw_bytes(&self) -> Option<&'a [u8]> {
let (start, end) = self.text_range()?;
Some(&self.text[start..end])
}
pub fn cursor_at_offset(&self, offset: usize) -> Option<JsonCursor<'a, W>> {
if offset >= self.text.len() {
return None;
}
let rank = self.index.ib_rank1(offset);
let ib_idx = if let Some(struct_pos) = self.index.ib_select1(rank) {
if struct_pos == offset {
rank
} else {
if rank > 0 {
rank - 1
} else {
return None;
}
}
} else if rank > 0 {
rank - 1
} else {
return None;
};
let bp = self.index.bp();
let bp_len = bp.len();
if bp_len == 0 {
return None;
}
let mut lo = 0;
let mut hi = bp_len;
while lo < hi {
let mid = lo + (hi - lo) / 2;
let count = bp.rank1(mid + 1);
if count <= ib_idx {
lo = mid + 1;
} else {
hi = mid;
}
}
if lo < bp_len && bp.rank1(lo + 1) == ib_idx + 1 {
Some(JsonCursor {
text: self.text,
index: self.index,
bp_pos: lo,
})
} else {
None
}
}
pub fn cursor_at_position(&self, line: usize, col: usize) -> Option<JsonCursor<'a, W>> {
let offset = self.index.to_offset(line, col)?;
self.cursor_at_offset(offset)
}
}
#[derive(Debug)]
pub struct JsonChildren<'a, W = Vec<u64>> {
current: Option<JsonCursor<'a, W>>,
}
impl<'a, W> Clone for JsonChildren<'a, W> {
fn clone(&self) -> Self {
*self
}
}
impl<'a, W> Copy for JsonChildren<'a, W> {}
impl<'a, W: AsRef<[u64]>> Iterator for JsonChildren<'a, W> {
type Item = JsonCursor<'a, W>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let cursor = self.current?;
self.current = cursor.next_sibling();
Some(cursor)
}
}
#[derive(Clone, Debug)]
pub enum StandardJson<'a, W = Vec<u64>> {
String(JsonString<'a>),
Number(JsonNumber<'a>),
Object(JsonFields<'a, W>),
Array(JsonElements<'a, W>),
Bool(bool),
Null,
Error(&'static str),
}
#[derive(Debug)]
pub struct JsonFields<'a, W = Vec<u64>> {
key_cursor: Option<JsonCursor<'a, W>>,
}
impl<'a, W> Clone for JsonFields<'a, W> {
fn clone(&self) -> Self {
*self
}
}
impl<'a, W> Copy for JsonFields<'a, W> {}
impl<'a, W: AsRef<[u64]>> JsonFields<'a, W> {
fn from_object_cursor(object_cursor: JsonCursor<'a, W>) -> Self {
Self {
key_cursor: object_cursor.first_child(),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.key_cursor.is_none()
}
pub fn uncons(&self) -> Option<(JsonField<'a, W>, JsonFields<'a, W>)> {
let key_cursor = self.key_cursor?;
let value_cursor = key_cursor.next_sibling()?;
let rest = JsonFields {
key_cursor: value_cursor.next_sibling(),
};
let field = JsonField {
key_cursor,
value_cursor,
};
Some((field, rest))
}
pub fn find(&self, name: &str) -> Option<StandardJson<'a, W>> {
let mut fields = *self;
while let Some((field, rest)) = fields.uncons() {
if let StandardJson::String(key) = field.key() {
if key.as_str().ok()? == name {
return Some(field.value());
}
}
fields = rest;
}
None
}
}
impl<'a, W: AsRef<[u64]>> Iterator for JsonFields<'a, W> {
type Item = JsonField<'a, W>;
fn next(&mut self) -> Option<Self::Item> {
let (field, rest) = self.uncons()?;
*self = rest;
Some(field)
}
}
#[derive(Debug)]
pub struct JsonField<'a, W = Vec<u64>> {
key_cursor: JsonCursor<'a, W>,
value_cursor: JsonCursor<'a, W>,
}
impl<'a, W> Clone for JsonField<'a, W> {
fn clone(&self) -> Self {
*self
}
}
impl<'a, W> Copy for JsonField<'a, W> {}
impl<'a, W: AsRef<[u64]>> JsonField<'a, W> {
#[inline]
pub fn key(&self) -> StandardJson<'a, W> {
self.key_cursor.value()
}
#[inline]
pub fn value(&self) -> StandardJson<'a, W> {
self.value_cursor.value()
}
#[inline]
pub fn value_cursor(&self) -> JsonCursor<'a, W> {
self.value_cursor
}
}
#[derive(Debug)]
pub struct JsonElements<'a, W = Vec<u64>> {
element_cursor: Option<JsonCursor<'a, W>>,
}
impl<'a, W> Clone for JsonElements<'a, W> {
fn clone(&self) -> Self {
*self
}
}
impl<'a, W> Copy for JsonElements<'a, W> {}
impl<'a, W: AsRef<[u64]>> JsonElements<'a, W> {
fn from_array_cursor(array_cursor: JsonCursor<'a, W>) -> Self {
Self {
element_cursor: array_cursor.first_child(),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.element_cursor.is_none()
}
pub fn uncons(&self) -> Option<(StandardJson<'a, W>, JsonElements<'a, W>)> {
let element_cursor = self.element_cursor?;
let rest = JsonElements {
element_cursor: element_cursor.next_sibling(),
};
let value = element_cursor.value();
Some((value, rest))
}
pub fn uncons_cursor(&self) -> Option<(JsonCursor<'a, W>, JsonElements<'a, W>)> {
let element_cursor = self.element_cursor?;
let rest = JsonElements {
element_cursor: element_cursor.next_sibling(),
};
Some((element_cursor, rest))
}
pub fn get(&self, index: usize) -> Option<StandardJson<'a, W>> {
let mut elements = *self;
for _ in 0..index {
let (_, rest) = elements.uncons()?;
elements = rest;
}
elements.uncons().map(|(elem, _)| elem)
}
#[inline]
pub fn get_fast(&self, index: usize) -> Option<StandardJson<'a, W>> {
let mut cursor = self.element_cursor?;
for _ in 0..index {
cursor = cursor.next_sibling()?;
}
Some(cursor.value())
}
}
impl<'a, W: AsRef<[u64]>> Iterator for JsonElements<'a, W> {
type Item = StandardJson<'a, W>;
fn next(&mut self) -> Option<Self::Item> {
let (elem, rest) = self.uncons()?;
*self = rest;
Some(elem)
}
}
#[derive(Clone, Copy, Debug)]
pub struct ElementCursorIter<'a, W = Vec<u64>> {
elements: JsonElements<'a, W>,
}
impl<'a, W: AsRef<[u64]>> ElementCursorIter<'a, W> {
pub fn new(elements: JsonElements<'a, W>) -> Self {
Self { elements }
}
}
impl<'a, W: AsRef<[u64]>> Iterator for ElementCursorIter<'a, W> {
type Item = JsonCursor<'a, W>;
fn next(&mut self) -> Option<Self::Item> {
let (cursor, rest) = self.elements.uncons_cursor()?;
self.elements = rest;
Some(cursor)
}
}
impl<'a, W: AsRef<[u64]>> JsonElements<'a, W> {
pub fn cursor_iter(self) -> ElementCursorIter<'a, W> {
ElementCursorIter::new(self)
}
}
#[derive(Clone, Copy, Debug)]
pub struct JsonString<'a> {
text: &'a [u8],
start: usize,
}
impl<'a> JsonString<'a> {
pub fn raw_bytes(&self) -> &'a [u8] {
let end = self.find_end();
&self.text[self.start..end]
}
pub fn as_str(&self) -> Result<Cow<'a, str>, JsonError> {
let start = self.start + 1;
let end = self.find_string_end();
let bytes = &self.text[start..end];
if !bytes.contains(&b'\\') {
let s = core::str::from_utf8(bytes).map_err(|_| JsonError::InvalidUtf8)?;
Ok(Cow::Borrowed(s))
} else {
decode_escapes(bytes).map(Cow::Owned)
}
}
fn find_end(&self) -> usize {
self.find_string_end() + 1 }
fn find_string_end(&self) -> usize {
let mut i = self.start + 1; while i < self.text.len() {
match self.text[i] {
b'"' => return i,
b'\\' => i += 2, _ => i += 1,
}
}
self.text.len()
}
}
fn decode_escapes(bytes: &[u8]) -> Result<String, JsonError> {
let mut result = String::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'\\' {
if i + 1 >= bytes.len() {
return Err(JsonError::InvalidEscape);
}
i += 1;
match bytes[i] {
b'"' => result.push('"'),
b'\\' => result.push('\\'),
b'/' => result.push('/'),
b'b' => result.push('\u{0008}'), b'f' => result.push('\u{000C}'), b'n' => result.push('\n'),
b'r' => result.push('\r'),
b't' => result.push('\t'),
b'u' => {
if i + 4 >= bytes.len() {
return Err(JsonError::InvalidUnicodeEscape);
}
let hex = &bytes[i + 1..i + 5];
let codepoint = parse_hex4(hex)?;
i += 4;
if (0xD800..=0xDBFF).contains(&codepoint) {
if i + 6 < bytes.len() && bytes[i + 1] == b'\\' && bytes[i + 2] == b'u' {
let low_hex = &bytes[i + 3..i + 7];
let low = parse_hex4(low_hex)?;
if (0xDC00..=0xDFFF).contains(&low) {
let cp = 0x10000
+ ((codepoint as u32 - 0xD800) << 10)
+ (low as u32 - 0xDC00);
if let Some(c) = char::from_u32(cp) {
result.push(c);
i += 6; } else {
return Err(JsonError::InvalidUnicodeEscape);
}
} else {
return Err(JsonError::InvalidUnicodeEscape);
}
} else {
return Err(JsonError::InvalidUnicodeEscape);
}
} else if (0xDC00..=0xDFFF).contains(&codepoint) {
return Err(JsonError::InvalidUnicodeEscape);
} else {
if let Some(c) = char::from_u32(codepoint as u32) {
result.push(c);
} else {
return Err(JsonError::InvalidUnicodeEscape);
}
}
}
_ => return Err(JsonError::InvalidEscape),
}
i += 1;
} else {
let start = i;
while i < bytes.len() && bytes[i] != b'\\' {
i += 1;
}
let chunk =
core::str::from_utf8(&bytes[start..i]).map_err(|_| JsonError::InvalidUtf8)?;
result.push_str(chunk);
}
}
Ok(result)
}
fn parse_hex4(hex: &[u8]) -> Result<u16, JsonError> {
if hex.len() != 4 {
return Err(JsonError::InvalidUnicodeEscape);
}
let mut value = 0u16;
for &b in hex {
let digit = match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => return Err(JsonError::InvalidUnicodeEscape),
};
value = value * 16 + digit as u16;
}
Ok(value)
}
#[derive(Clone, Copy, Debug)]
pub struct JsonNumber<'a> {
text: &'a [u8],
start: usize,
}
impl<'a> JsonNumber<'a> {
pub fn raw_bytes(&self) -> &'a [u8] {
let end = self.find_end();
&self.text[self.start..end]
}
pub fn as_i64(&self) -> Result<i64, JsonError> {
let bytes = self.raw_bytes();
let s = core::str::from_utf8(bytes).map_err(|_| JsonError::InvalidUtf8)?;
s.parse().map_err(|_| JsonError::InvalidNumber)
}
pub fn as_f64(&self) -> Result<f64, JsonError> {
let bytes = self.raw_bytes();
let s = core::str::from_utf8(bytes).map_err(|_| JsonError::InvalidUtf8)?;
s.parse().map_err(|_| JsonError::InvalidNumber)
}
fn find_end(&self) -> usize {
let mut i = self.start;
while i < self.text.len() {
match self.text[i] {
b'0'..=b'9' | b'-' | b'+' | b'.' | b'e' | b'E' => i += 1,
_ => break,
}
}
i
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum JsonError {
InvalidUtf8,
InvalidNumber,
InvalidEscape,
InvalidUnicodeEscape,
}
impl core::fmt::Display for JsonError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
JsonError::InvalidUtf8 => write!(f, "invalid UTF-8 in string"),
JsonError::InvalidNumber => write!(f, "invalid number format"),
JsonError::InvalidEscape => write!(f, "invalid escape sequence in string"),
JsonError::InvalidUnicodeEscape => write!(f, "invalid unicode escape sequence"),
}
}
}
pub type OwnedJsonIndex = JsonIndex<Vec<u64>>;
pub type BorrowedJsonIndex<'a> = JsonIndex<&'a [u64]>;
pub type OwnedJsonCursor<'a> = JsonCursor<'a, Vec<u64>>;
pub type BorrowedJsonCursor<'a> = JsonCursor<'a, &'a [u64]>;
use crate::jq::document::{
DocumentCursor, DocumentElements, DocumentField, DocumentFields, DocumentValue,
};
impl<'a, W: AsRef<[u64]> + Clone> DocumentCursor for JsonCursor<'a, W> {
type Value = StandardJson<'a, W>;
#[inline]
fn value(&self) -> Self::Value {
JsonCursor::value(self)
}
#[inline]
fn first_child(&self) -> Option<Self> {
JsonCursor::first_child(self)
}
#[inline]
fn next_sibling(&self) -> Option<Self> {
JsonCursor::next_sibling(self)
}
#[inline]
fn parent(&self) -> Option<Self> {
JsonCursor::parent(self)
}
#[inline]
fn is_container(&self) -> bool {
JsonCursor::is_container(self)
}
#[inline]
fn text_position(&self) -> Option<usize> {
JsonCursor::text_position(self)
}
#[inline]
fn cursor_at_offset(&self, offset: usize) -> Option<Self> {
JsonCursor::cursor_at_offset(self, offset)
}
#[inline]
fn cursor_at_position(&self, line: usize, col: usize) -> Option<Self> {
JsonCursor::cursor_at_position(self, line, col)
}
#[inline]
fn stream_json<Out: core::fmt::Write>(&self, out: &mut Out) -> core::fmt::Result {
if let Some(bytes) = self.raw_bytes() {
let s = core::str::from_utf8(bytes).map_err(|_| core::fmt::Error)?;
out.write_str(s)
} else {
Err(core::fmt::Error)
}
}
#[inline]
fn stream_yaml<Out: core::fmt::Write>(
&self,
out: &mut Out,
indent_spaces: usize,
) -> core::fmt::Result {
stream_json_as_yaml(out, self.value(), 0, indent_spaces)
}
#[inline]
fn is_falsy(&self) -> bool {
matches!(self.value(), StandardJson::Null | StandardJson::Bool(false))
}
}
impl<'a, W: AsRef<[u64]> + Clone> DocumentValue for StandardJson<'a, W> {
type Cursor = JsonCursor<'a, W>;
type Fields = JsonFields<'a, W>;
type Elements = JsonElements<'a, W>;
#[inline]
fn is_null(&self) -> bool {
matches!(self, StandardJson::Null)
}
fn as_bool(&self) -> Option<bool> {
match self {
StandardJson::Bool(b) => Some(*b),
_ => None,
}
}
fn as_i64(&self) -> Option<i64> {
match self {
StandardJson::Number(n) => n.as_i64().ok(),
_ => None,
}
}
fn as_f64(&self) -> Option<f64> {
match self {
StandardJson::Number(n) => n.as_f64().ok(),
_ => None,
}
}
fn as_str(&self) -> Option<Cow<'_, str>> {
match self {
StandardJson::String(s) => s.as_str().ok(),
_ => None,
}
}
fn as_object(&self) -> Option<Self::Fields> {
match self {
StandardJson::Object(fields) => Some(*fields),
_ => None,
}
}
fn as_array(&self) -> Option<Self::Elements> {
match self {
StandardJson::Array(elements) => Some(*elements),
_ => None,
}
}
fn type_name(&self) -> &'static str {
match self {
StandardJson::Null => "null",
StandardJson::Bool(_) => "boolean",
StandardJson::Number(_) => "number",
StandardJson::String(_) => "string",
StandardJson::Array(_) => "array",
StandardJson::Object(_) => "object",
StandardJson::Error(_) => "error",
}
}
fn is_error(&self) -> bool {
matches!(self, StandardJson::Error(_))
}
fn error_message(&self) -> Option<&'static str> {
match self {
StandardJson::Error(msg) => Some(msg),
_ => None,
}
}
}
impl<'a, W: AsRef<[u64]> + Clone> DocumentFields for JsonFields<'a, W> {
type Value = StandardJson<'a, W>;
type Cursor = JsonCursor<'a, W>;
fn uncons(&self) -> Option<(DocumentField<Self::Value, Self::Cursor>, Self)> {
let (field, rest) = JsonFields::uncons(self)?;
Some((
DocumentField {
key: field.key(),
value: field.value(),
value_cursor: field.value_cursor(),
},
rest,
))
}
fn find(&self, name: &str) -> Option<Self::Value> {
JsonFields::find(self, name)
}
fn is_empty(&self) -> bool {
JsonFields::is_empty(self)
}
}
impl<'a, W: AsRef<[u64]> + Clone> DocumentElements for JsonElements<'a, W> {
type Value = StandardJson<'a, W>;
type Cursor = JsonCursor<'a, W>;
fn uncons(&self) -> Option<(Self::Value, Self)> {
JsonElements::uncons(self)
}
fn uncons_cursor(&self) -> Option<(Self::Cursor, Self)> {
JsonElements::uncons_cursor(self)
}
fn get(&self, index: usize) -> Option<Self::Value> {
JsonElements::get_fast(self, index)
}
fn is_empty(&self) -> bool {
JsonElements::is_empty(self)
}
}
fn stream_json_as_yaml<'a, W: AsRef<[u64]> + Clone, Out: core::fmt::Write>(
out: &mut Out,
value: StandardJson<'a, W>,
current_indent: usize,
indent_spaces: usize,
) -> core::fmt::Result {
match value {
StandardJson::Null => out.write_str("null"),
StandardJson::Bool(b) => out.write_str(if b { "true" } else { "false" }),
StandardJson::Number(n) => {
if let Ok(i) = n.as_i64() {
write!(out, "{}", i)
} else if let Ok(f) = n.as_f64() {
if f.is_nan() {
out.write_str(".nan")
} else if f.is_infinite() {
if f > 0.0 {
out.write_str(".inf")
} else {
out.write_str("-.inf")
}
} else {
write!(out, "{}", f)
}
} else {
out.write_str("null")
}
}
StandardJson::String(s) => {
let str_val = core::str::from_utf8(s.raw_bytes()).map_err(|_| core::fmt::Error)?;
stream_json_string_as_yaml(out, str_val)
}
StandardJson::Array(elements) => {
if elements.is_empty() {
return out.write_str("[]");
}
if indent_spaces == 0 {
out.write_char('[')?;
let mut first = true;
for elem in elements {
if !first {
out.write_str(", ")?;
}
first = false;
stream_json_as_yaml(out, elem, 0, 0)?;
}
out.write_char(']')
} else {
let mut first = true;
for elem in elements {
if !first {
out.write_char('\n')?;
write_json_yaml_indent(out, current_indent)?;
}
first = false;
out.write_str("- ")?;
if is_json_container(&elem) {
out.write_char('\n')?;
write_json_yaml_indent(out, current_indent + indent_spaces)?;
stream_json_as_yaml(
out,
elem,
current_indent + indent_spaces,
indent_spaces,
)?;
} else {
stream_json_as_yaml(
out,
elem,
current_indent + indent_spaces,
indent_spaces,
)?;
}
}
Ok(())
}
}
StandardJson::Object(fields) => {
if fields.is_empty() {
return out.write_str("{}");
}
if indent_spaces == 0 {
out.write_char('{')?;
let mut first = true;
for field in fields {
if !first {
out.write_str(", ")?;
}
first = false;
if let StandardJson::String(k) = field.key() {
let key_str =
core::str::from_utf8(k.raw_bytes()).map_err(|_| core::fmt::Error)?;
stream_json_string_as_yaml(out, key_str)?;
} else {
out.write_str("\"\"")?;
}
out.write_str(": ")?;
stream_json_as_yaml(out, field.value(), 0, 0)?;
}
out.write_char('}')
} else {
let mut first = true;
for field in fields {
if !first {
out.write_char('\n')?;
write_json_yaml_indent(out, current_indent)?;
}
first = false;
if let StandardJson::String(k) = field.key() {
let key_str =
core::str::from_utf8(k.raw_bytes()).map_err(|_| core::fmt::Error)?;
stream_json_string_as_yaml(out, key_str)?;
} else {
out.write_str("\"\"")?;
}
out.write_char(':')?;
let val = field.value();
if is_json_container(&val) {
out.write_char('\n')?;
write_json_yaml_indent(out, current_indent + indent_spaces)?;
stream_json_as_yaml(
out,
val,
current_indent + indent_spaces,
indent_spaces,
)?;
} else {
out.write_char(' ')?;
stream_json_as_yaml(
out,
val,
current_indent + indent_spaces,
indent_spaces,
)?;
}
}
Ok(())
}
}
StandardJson::Error(_) => out.write_str("null"),
}
}
fn is_json_container<W: AsRef<[u64]> + Clone>(value: &StandardJson<'_, W>) -> bool {
match value {
StandardJson::Array(elements) => !elements.is_empty(),
StandardJson::Object(fields) => !fields.is_empty(),
_ => false,
}
}
fn write_json_yaml_indent<Out: core::fmt::Write>(
out: &mut Out,
spaces: usize,
) -> core::fmt::Result {
for _ in 0..spaces {
out.write_char(' ')?;
}
Ok(())
}
fn stream_json_string_as_yaml<Out: core::fmt::Write>(out: &mut Out, s: &str) -> core::fmt::Result {
if s.is_empty() {
return out.write_str("''");
}
if needs_json_yaml_quoting(s) {
stream_json_yaml_double_quoted(out, s)
} else {
out.write_str(s)
}
}
fn needs_json_yaml_quoting(s: &str) -> bool {
if s.is_empty() {
return true;
}
let bytes = s.as_bytes();
let first = bytes[0];
if matches!(
first,
b'-' | b'?'
| b':'
| b','
| b'['
| b']'
| b'{'
| b'}'
| b'#'
| b'&'
| b'*'
| b'!'
| b'|'
| b'>'
| b'\''
| b'"'
| b'%'
| b'@'
| b'`'
) {
return true;
}
if bytes[0] == b' ' || bytes[bytes.len() - 1] == b' ' {
return true;
}
let lower = s.to_lowercase();
if matches!(
lower.as_str(),
"null" | "~" | "true" | "false" | "yes" | "no" | "on" | "off" | ".inf" | "-.inf" | ".nan"
) {
return true;
}
if looks_like_json_yaml_number(s) {
return true;
}
for b in bytes {
if *b < 0x20 || *b == b':' || *b == b'#' {
return true;
}
}
false
}
fn looks_like_json_yaml_number(s: &str) -> bool {
if s.is_empty() {
return false;
}
let bytes = s.as_bytes();
let mut i = 0;
if bytes[i] == b'-' || bytes[i] == b'+' {
i += 1;
if i >= bytes.len() {
return false;
}
}
if !bytes[i].is_ascii_digit() {
return false;
}
let mut has_dot = false;
let mut has_exp = false;
while i < bytes.len() {
match bytes[i] {
b'0'..=b'9' => {}
b'.' if !has_dot && !has_exp => has_dot = true,
b'e' | b'E' if !has_exp => {
has_exp = true;
if i + 1 < bytes.len() && (bytes[i + 1] == b'-' || bytes[i + 1] == b'+') {
i += 1;
}
}
_ => return false,
}
i += 1;
}
true
}
fn stream_json_yaml_double_quoted<Out: core::fmt::Write>(
out: &mut Out,
s: &str,
) -> core::fmt::Result {
out.write_char('"')?;
for ch in s.chars() {
match ch {
'"' => out.write_str("\\\"")?,
'\\' => out.write_str("\\\\")?,
'\n' => out.write_str("\\n")?,
'\r' => out.write_str("\\r")?,
'\t' => out.write_str("\\t")?,
c if (c as u32) < 0x20 => {
let b = c as u8;
out.write_str("\\x")?;
const HEX: &[u8; 16] = b"0123456789abcdef";
out.write_char(HEX[(b >> 4) as usize] as char)?;
out.write_char(HEX[(b & 0xf) as usize] as char)?;
}
c => out.write_char(c)?,
}
}
out.write_char('"')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_index() {
let json = br#"{"a": 1}"#;
let index = JsonIndex::build(json);
assert!(!index.bp().is_empty());
}
#[test]
fn test_root_cursor() {
let json = br#"{"a": 1}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
assert_eq!(root.bp_position(), 0);
}
#[test]
fn test_empty_object() {
let json = br#"{}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Object(fields) => {
assert!(fields.is_empty());
}
_ => panic!("expected object"),
}
}
#[test]
fn test_empty_array() {
let json = br#"[]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Array(elements) => {
assert!(elements.is_empty());
}
_ => panic!("expected array"),
}
}
#[test]
fn test_simple_values() {
let json = b"true";
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(matches!(root.value(), StandardJson::Bool(true)));
let json = b"false";
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(matches!(root.value(), StandardJson::Bool(false)));
let json = b"null";
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(matches!(root.value(), StandardJson::Null));
}
#[test]
fn test_number() {
let json = b"42";
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Number(n) => {
assert_eq!(n.as_i64().unwrap(), 42);
}
_ => panic!("expected number"),
}
}
#[test]
fn test_string() {
let json = br#""hello""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
assert_eq!(s.as_str().unwrap(), "hello");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_object_single_field() {
let json = br#"{"name": "Alice"}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Object(fields) => {
assert!(!fields.is_empty());
let (field, rest) = fields.uncons().expect("should have one field");
match field.key() {
StandardJson::String(s) => {
assert_eq!(s.as_str().unwrap(), "name");
}
_ => panic!("expected string key"),
}
match field.value() {
StandardJson::String(s) => {
assert_eq!(s.as_str().unwrap(), "Alice");
}
_ => panic!("expected string value"),
}
assert!(rest.is_empty());
}
_ => panic!("expected object"),
}
}
#[test]
fn test_object_multiple_fields() {
let json = br#"{"name": "Bob", "age": 30}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Object(fields) => {
let (field1, rest1) = fields.uncons().expect("should have first field");
match field1.key() {
StandardJson::String(s) => assert_eq!(s.as_str().unwrap(), "name"),
_ => panic!("expected string key"),
}
match field1.value() {
StandardJson::String(s) => assert_eq!(s.as_str().unwrap(), "Bob"),
_ => panic!("expected string value"),
}
let (field2, rest2) = rest1.uncons().expect("should have second field");
match field2.key() {
StandardJson::String(s) => assert_eq!(s.as_str().unwrap(), "age"),
_ => panic!("expected string key"),
}
match field2.value() {
StandardJson::Number(n) => assert_eq!(n.as_i64().unwrap(), 30),
_ => panic!("expected number value"),
}
assert!(rest2.is_empty());
}
_ => panic!("expected object"),
}
}
#[test]
fn test_object_find_field() {
let json = br#"{"name": "Charlie", "age": 25, "city": "NYC"}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Object(fields) => {
match fields.find("age") {
Some(StandardJson::Number(n)) => assert_eq!(n.as_i64().unwrap(), 25),
_ => panic!("expected number"),
}
match fields.find("name") {
Some(StandardJson::String(s)) => assert_eq!(s.as_str().unwrap(), "Charlie"),
_ => panic!("expected string"),
}
match fields.find("city") {
Some(StandardJson::String(s)) => assert_eq!(s.as_str().unwrap(), "NYC"),
_ => panic!("expected string"),
}
assert!(fields.find("missing").is_none());
}
_ => panic!("expected object"),
}
}
#[test]
fn test_array_single_element() {
let json = br#"[42]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Array(elements) => {
assert!(!elements.is_empty());
let (elem, rest) = elements.uncons().expect("should have one element");
match elem {
StandardJson::Number(n) => assert_eq!(n.as_i64().unwrap(), 42),
_ => panic!("expected number"),
}
assert!(rest.is_empty());
}
_ => panic!("expected array"),
}
}
#[test]
fn test_array_multiple_elements() {
let json = br#"[1, 2, 3]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Array(elements) => {
let (e1, rest1) = elements.uncons().expect("first");
let (e2, rest2) = rest1.uncons().expect("second");
let (e3, rest3) = rest2.uncons().expect("third");
match e1 {
StandardJson::Number(n) => assert_eq!(n.as_i64().unwrap(), 1),
_ => panic!("expected number"),
}
match e2 {
StandardJson::Number(n) => assert_eq!(n.as_i64().unwrap(), 2),
_ => panic!("expected number"),
}
match e3 {
StandardJson::Number(n) => assert_eq!(n.as_i64().unwrap(), 3),
_ => panic!("expected number"),
}
assert!(rest3.is_empty());
}
_ => panic!("expected array"),
}
}
#[test]
fn test_array_get() {
let json = br#"["a", "b", "c"]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Array(elements) => {
match elements.get(0) {
Some(StandardJson::String(s)) => assert_eq!(s.as_str().unwrap(), "a"),
_ => panic!("expected string at index 0"),
}
match elements.get(1) {
Some(StandardJson::String(s)) => assert_eq!(s.as_str().unwrap(), "b"),
_ => panic!("expected string at index 1"),
}
match elements.get(2) {
Some(StandardJson::String(s)) => assert_eq!(s.as_str().unwrap(), "c"),
_ => panic!("expected string at index 2"),
}
assert!(elements.get(3).is_none());
}
_ => panic!("expected array"),
}
}
#[test]
fn test_nested_object() {
let json = br#"{"person": {"name": "Dave"}}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Object(fields) => match fields.find("person") {
Some(StandardJson::Object(inner_fields)) => match inner_fields.find("name") {
Some(StandardJson::String(s)) => {
assert_eq!(s.as_str().unwrap(), "Dave");
}
_ => panic!("expected string"),
},
_ => panic!("expected nested object"),
},
_ => panic!("expected object"),
}
}
#[test]
fn test_array_of_objects() {
let json = br#"[{"a": 1}, {"b": 2}]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Array(elements) => {
match elements.get(0) {
Some(StandardJson::Object(fields)) => match fields.find("a") {
Some(StandardJson::Number(n)) => assert_eq!(n.as_i64().unwrap(), 1),
_ => panic!("expected number"),
},
_ => panic!("expected object"),
}
match elements.get(1) {
Some(StandardJson::Object(fields)) => match fields.find("b") {
Some(StandardJson::Number(n)) => assert_eq!(n.as_i64().unwrap(), 2),
_ => panic!("expected number"),
},
_ => panic!("expected object"),
}
}
_ => panic!("expected array"),
}
}
#[test]
fn test_negative_number() {
let json = b"-123";
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Number(n) => {
assert_eq!(n.as_i64().unwrap(), -123);
}
_ => panic!("expected number"),
}
}
#[test]
fn test_float_number() {
let json = b"1.23456";
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Number(n) => {
let f = n.as_f64().unwrap();
assert!((f - 1.23456).abs() < 0.0001);
}
_ => panic!("expected number"),
}
}
#[test]
fn test_immutable_iteration() {
let json = br#"[1, 2, 3]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
if let StandardJson::Array(elements) = root.value() {
let (e1, rest1) = elements.uncons().unwrap();
assert!(matches!(e1, StandardJson::Number(_)));
let (e1_again, _) = elements.uncons().unwrap();
assert!(matches!(e1_again, StandardJson::Number(_)));
let (e2, _) = rest1.uncons().unwrap();
assert!(matches!(e2, StandardJson::Number(_)));
}
}
#[test]
fn test_string_no_escapes_is_borrowed() {
let json = br#""hello world""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert!(matches!(result, Cow::Borrowed(_)));
assert_eq!(&*result, "hello world");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_quote() {
let json = br#""hello\"world""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\"world");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_backslash() {
let json = br#""hello\\world""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\\world");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_slash() {
let json = br#""hello\/world""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello/world");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_newline() {
let json = br#""hello\nworld""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\nworld");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_tab() {
let json = br#""hello\tworld""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\tworld");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_carriage_return() {
let json = br#""hello\rworld""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\rworld");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_backspace() {
let json = br#""hello\bworld""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\u{0008}world");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_escaped_formfeed() {
let json = br#""hello\fworld""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "hello\u{000C}world");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_unicode_escape_bmp() {
let json = br#""\u0041""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "A");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_unicode_escape_euro() {
let json = br#""\u20AC""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "€");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_unicode_escape_lowercase() {
let json = br#""\u00e9""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "é");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_unicode_surrogate_pair() {
let json = br#""\uD83D\uDE00""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "😀");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_multiple_escapes() {
let json = br#""line1\nline2\ttab\r\n""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "line1\nline2\ttab\r\n");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_mixed_escapes_and_unicode() {
let json = br#""Price: \u20AC100\nTax: \u00A310""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
let result = s.as_str().unwrap();
assert_eq!(&*result, "Price: €100\nTax: £10");
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_invalid_escape() {
let json = br#""\x""#; let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
assert_eq!(s.as_str(), Err(JsonError::InvalidEscape));
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_lone_high_surrogate() {
let json = br#""\uD83D""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
assert_eq!(s.as_str(), Err(JsonError::InvalidUnicodeEscape));
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_lone_low_surrogate() {
let json = br#""\uDE00""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
assert_eq!(s.as_str(), Err(JsonError::InvalidUnicodeEscape));
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_invalid_unicode_hex() {
let json = br#""\uXXXX""#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::String(s) => {
assert_eq!(s.as_str(), Err(JsonError::InvalidUnicodeEscape));
}
_ => panic!("expected string"),
}
}
#[test]
fn test_string_with_escaped_key_in_object() {
let json = br#"{"na\nme": "value"}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
match root.value() {
StandardJson::Object(fields) => {
let (field, _) = fields.uncons().unwrap();
match field.key() {
StandardJson::String(s) => {
assert_eq!(&*s.as_str().unwrap(), "na\nme");
}
_ => panic!("expected string key"),
}
}
_ => panic!("expected object"),
}
}
#[test]
fn test_json_fields_iterator() {
let json = br#"{"a": 1, "b": 2, "c": 3}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
if let StandardJson::Object(fields) = root.value() {
let keys: Vec<_> = fields
.map(|f| {
if let StandardJson::String(s) = f.key() {
s.as_str().unwrap().into_owned()
} else {
panic!("expected string key")
}
})
.collect();
assert_eq!(keys, vec!["a", "b", "c"]);
} else {
panic!("expected object");
}
}
#[test]
fn test_json_elements_iterator() {
let json = br#"[1, 2, 3, 4, 5]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
if let StandardJson::Array(elements) = root.value() {
let nums: Vec<_> = elements
.filter_map(|e| {
if let StandardJson::Number(n) = e {
n.as_i64().ok()
} else {
None
}
})
.collect();
assert_eq!(nums, vec![1, 2, 3, 4, 5]);
} else {
panic!("expected array");
}
}
#[test]
fn test_iterator_empty_object() {
let json = br#"{}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
if let StandardJson::Object(fields) = root.value() {
assert_eq!(fields.count(), 0);
} else {
panic!("expected object");
}
}
#[test]
fn test_iterator_empty_array() {
let json = br#"[]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
if let StandardJson::Array(elements) = root.value() {
assert_eq!(elements.count(), 0);
} else {
panic!("expected array");
}
}
#[test]
fn test_json_error_display() {
use std::string::ToString;
assert_eq!(
JsonError::InvalidUtf8.to_string(),
"invalid UTF-8 in string"
);
assert_eq!(
JsonError::InvalidNumber.to_string(),
"invalid number format"
);
assert_eq!(
JsonError::InvalidEscape.to_string(),
"invalid escape sequence in string"
);
assert_eq!(
JsonError::InvalidUnicodeEscape.to_string(),
"invalid unicode escape sequence"
);
}
#[test]
fn test_is_container_object() {
let json = br#"{"a": 1}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(root.is_container());
}
#[test]
fn test_is_container_array() {
let json = br#"[1, 2, 3]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(root.is_container());
}
#[test]
fn test_is_container_empty_object() {
let json = br#"{}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(!root.is_container());
}
#[test]
fn test_is_container_empty_array() {
let json = br#"[]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
assert!(!root.is_container());
}
#[test]
fn test_is_container_leaf_values() {
let json = br#""hello""#;
let index = JsonIndex::build(json);
assert!(!index.root(json).is_container());
let json = b"42";
let index = JsonIndex::build(json);
assert!(!index.root(json).is_container());
let json = b"true";
let index = JsonIndex::build(json);
assert!(!index.root(json).is_container());
let json = b"null";
let index = JsonIndex::build(json);
assert!(!index.root(json).is_container());
}
#[test]
fn test_children_array() {
let json = br#"[1, 2, 3]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let count: usize = root.children().count();
assert_eq!(count, 3);
}
#[test]
fn test_children_object() {
let json = br#"{"a": 1, "b": 2}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let count: usize = root.children().count();
assert_eq!(count, 4);
}
#[test]
fn test_children_nested() {
let json = br#"{"arr": [1, 2]}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let direct_children: Vec<_> = root.children().collect();
assert_eq!(direct_children.len(), 2);
let array_cursor = direct_children[1]; assert!(array_cursor.is_container());
assert_eq!(array_cursor.children().count(), 2);
}
#[test]
fn test_children_empty() {
let json = br#"[]"#;
let index = JsonIndex::build(json);
let root = index.root(json);
assert_eq!(root.children().count(), 0);
}
#[test]
fn test_children_recursive_count() {
let json = br#"{"a": [1, 2], "b": {"c": 3}}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
fn count_all(cursor: super::JsonCursor) -> usize {
1 + cursor.children().map(count_all).sum::<usize>()
}
assert_eq!(count_all(root), 9);
}
#[test]
fn test_newline_index_single_line() {
let json = br#"{"name": "Alice"}"#;
let index = JsonIndex::build(json);
assert_eq!(index.to_line_column(0), (1, 1)); assert_eq!(index.to_line_column(8), (1, 9)); assert_eq!(index.to_line_column(16), (1, 17));
assert_eq!(index.to_offset(1, 1), Some(0));
assert_eq!(index.to_offset(1, 9), Some(8));
assert_eq!(index.to_offset(2, 1), None); }
#[test]
fn test_newline_index_multi_line() {
let json = b"{\n \"name\": \"Alice\"\n}";
let index = JsonIndex::build(json);
assert_eq!(index.to_line_column(0), (1, 1));
assert_eq!(index.to_line_column(1), (1, 2));
assert_eq!(index.to_line_column(2), (2, 1)); assert_eq!(index.to_line_column(4), (2, 3));
assert_eq!(index.to_line_column(20), (3, 1));
assert_eq!(index.to_offset(1, 1), Some(0));
assert_eq!(index.to_offset(2, 1), Some(2));
assert_eq!(index.to_offset(3, 1), Some(20));
}
#[test]
fn test_newline_index_array() {
let json = b"[\n 1,\n 2,\n 3\n]";
let index = JsonIndex::build(json);
assert_eq!(index.to_line_column(0), (1, 1));
assert_eq!(index.to_line_column(2), (2, 1));
assert_eq!(index.to_line_column(5), (2, 4));
assert_eq!(index.to_line_column(7), (3, 1));
assert_eq!(index.to_line_column(12), (4, 1));
assert_eq!(index.to_line_column(16), (5, 1));
assert_eq!(index.to_offset(1, 1), Some(0));
assert_eq!(index.to_offset(2, 1), Some(2));
assert_eq!(index.to_offset(3, 1), Some(7));
assert_eq!(index.to_offset(5, 1), Some(16));
}
#[test]
fn test_newline_index_crlf() {
let json = b"{\r\n\"a\": 1\r\n}";
let index = JsonIndex::build(json);
assert_eq!(index.to_line_column(0), (1, 1));
assert_eq!(index.to_line_column(3), (2, 1));
assert_eq!(index.to_offset(2, 1), Some(3));
assert_eq!(index.to_line_column(11), (3, 1));
assert_eq!(index.to_offset(3, 1), Some(11));
}
#[test]
fn test_newline_index_invalid_inputs() {
let json = b"{\n\"a\": 1\n}";
let index = JsonIndex::build(json);
assert_eq!(index.to_offset(0, 1), None); assert_eq!(index.to_offset(1, 0), None); }
#[test]
fn test_newline_index_round_trip() {
let json =
b"{\n \"users\": [\n {\"name\": \"Alice\"},\n {\"name\": \"Bob\"}\n ]\n}";
let index = JsonIndex::build(json);
for offset in 0..json.len() {
let (line, col) = index.to_line_column(offset);
let result = index.to_offset(line, col);
assert_eq!(
result,
Some(offset),
"Round-trip failed for offset {}",
offset
);
}
}
#[test]
fn test_text_range_nested_object_value() {
let json = br#"{"key": {"key2": "value"}}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let fields = root.value().as_object().unwrap();
let (value, _) = fields.uncons().unwrap();
let range = value.value_cursor().text_range().unwrap();
assert_eq!(range, (8, 25));
}
#[test]
fn test_text_range_empty_object_value() {
let json = br#"{"key": {}}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let fields = root.value().as_object().unwrap();
let (field, _) = fields.uncons().unwrap();
let range = field.value_cursor().text_range().unwrap();
assert_eq!(range, (8, 10));
assert_eq!(&json[range.0..range.1], b"{}");
}
#[test]
fn test_text_range_empty_array_value() {
let json = br#"{"list": []}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let fields = root.value().as_object().unwrap();
let (field, _) = fields.uncons().unwrap();
let range = field.value_cursor().text_range().unwrap();
assert_eq!(range, (9, 11));
assert_eq!(&json[range.0..range.1], b"[]");
}
#[test]
fn test_text_range_array_value() {
let json = br#"{"items": [1, 2, 3]}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let fields = root.value().as_object().unwrap();
let (field, _) = fields.uncons().unwrap();
let range = field.value_cursor().text_range().unwrap();
assert_eq!(range, (10, 19));
assert_eq!(&json[range.0..range.1], b"[1, 2, 3]");
}
#[test]
fn test_text_range_second_field() {
let json = br#"{"a": 1, "b": "hello"}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let fields = root.value().as_object().unwrap();
let (_, rest) = fields.uncons().unwrap();
let (field_b, _) = rest.uncons().unwrap();
let range = field_b.value_cursor().text_range().unwrap();
assert_eq!(range, (14, 21));
assert_eq!(&json[range.0..range.1], br#""hello""#);
}
#[test]
fn test_text_range_deeply_nested() {
let json = br#"{"a": {"b": {"c": 1}}}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let fields = root.value().as_object().unwrap();
let (field_a, _) = fields.uncons().unwrap();
assert_eq!(field_a.value_cursor().text_range().unwrap(), (6, 21));
assert_eq!(&json[6..21], br#"{"b": {"c": 1}}"#);
let fields_b = field_a.value().as_object().unwrap();
let (field_b, _) = fields_b.uncons().unwrap();
assert_eq!(field_b.value_cursor().text_range().unwrap(), (12, 20));
assert_eq!(&json[12..20], br#"{"c": 1}"#);
let fields_c = field_b.value().as_object().unwrap();
let (field_c, _) = fields_c.uncons().unwrap();
assert_eq!(field_c.value_cursor().text_range().unwrap(), (18, 19));
assert_eq!(&json[18..19], b"1");
}
#[test]
fn test_text_range_root_object() {
let json = br#"{"a": 1}"#;
let index = JsonIndex::build(json);
let root = index.root(json);
let range = root.text_range().unwrap();
assert_eq!(range, (0, 8));
assert_eq!(&json[range.0..range.1], br#"{"a": 1}"#);
}
#[test]
fn test_text_range_root_array() {
let json = b"[1, 2, 3]";
let index = JsonIndex::build(json);
let root = index.root(json);
let range = root.text_range().unwrap();
assert_eq!(range, (0, 9));
assert_eq!(&json[range.0..range.1], b"[1, 2, 3]");
}
}