pub mod borrowed;
pub mod buffered;
pub mod error;
pub mod owned;
pub use borrowed::BorrowedBytes;
pub use buffered::BufferedInput;
pub use owned::OwnedBytes;
pub mod mmap;
pub use mmap::MmapInput;
use self::error::InputError;
use crate::{query::JsonString, result::InputRecorder, FallibleIterator};
use std::ops::Deref;
macro_rules! repr_align_block_size {
($it:item) => {
#[repr(C, align(128))]
$it
};
}
pub(crate) use repr_align_block_size;
pub const MAX_BLOCK_SIZE: usize = 128;
pub trait Input: Sized {
type BlockIterator<'i, 'r, const N: usize, R>: InputBlockIterator<'i, N, Block = Self::Block<'i, N>>
where
Self: 'i,
R: InputRecorder<Self::Block<'i, N>> + 'r;
type Block<'i, const N: usize>: InputBlock<'i, N>
where
Self: 'i;
#[must_use]
fn iter_blocks<'i, 'r, R, const N: usize>(&'i self, recorder: &'r R) -> Self::BlockIterator<'i, 'r, N, R>
where
R: InputRecorder<Self::Block<'i, N>>;
#[must_use]
fn seek_backward(&self, from: usize, needle: u8) -> Option<usize>;
fn seek_forward<const N: usize>(&self, from: usize, needles: [u8; N]) -> Result<Option<(usize, u8)>, InputError>;
fn seek_non_whitespace_forward(&self, from: usize) -> Result<Option<(usize, u8)>, InputError>;
#[must_use]
fn seek_non_whitespace_backward(&self, from: usize) -> Option<(usize, u8)>;
fn find_member(&self, from: usize, member: &JsonString) -> Result<Option<usize>, InputError>;
#[must_use]
fn is_member_match(&self, from: usize, to: usize, member: &JsonString) -> bool;
}
pub trait InputBlockIterator<'i, const N: usize>: FallibleIterator<Item = Self::Block, Error = InputError> {
type Block: InputBlock<'i, N>;
fn get_offset(&self) -> usize;
fn offset(&mut self, count: isize);
}
pub trait InputBlock<'i, const N: usize>: Deref<Target = [u8]> {
fn halves(&self) -> (&[u8], &[u8]);
}
impl<'i, const N: usize> InputBlock<'i, N> for &'i [u8] {
#[inline(always)]
fn halves(&self) -> (&[u8], &[u8]) {
assert_eq!(N % 2, 0);
(&self[..N / 2], &self[N / 2..])
}
}
struct LastBlock {
bytes: [u8; MAX_BLOCK_SIZE],
absolute_start: usize,
}
pub(super) mod in_slice {
use super::{LastBlock, MAX_BLOCK_SIZE};
use crate::query::JsonString;
#[inline]
pub(super) fn pad_last_block(bytes: &[u8]) -> LastBlock {
let mut last_block_buf = [0; MAX_BLOCK_SIZE];
let last_block_start = (bytes.len() / MAX_BLOCK_SIZE) * MAX_BLOCK_SIZE;
let last_block_slice = &bytes[last_block_start..];
last_block_buf[..last_block_slice.len()].copy_from_slice(last_block_slice);
LastBlock {
bytes: last_block_buf,
absolute_start: last_block_start,
}
}
#[inline]
pub(super) fn seek_backward(bytes: &[u8], from: usize, needle: u8) -> Option<usize> {
let mut idx = from;
assert!(idx < bytes.len());
loop {
if bytes[idx] == needle {
return Some(idx);
}
if idx == 0 {
return None;
}
idx -= 1;
}
}
#[inline]
pub(super) fn seek_forward<const N: usize>(bytes: &[u8], from: usize, needles: [u8; N]) -> Option<(usize, u8)> {
assert!(N > 0);
let mut idx = from;
if idx >= bytes.len() {
return None;
}
loop {
let b = bytes[idx];
if needles.contains(&b) {
return Some((idx, b));
}
idx += 1;
if idx == bytes.len() {
return None;
}
}
}
#[inline]
pub(super) fn seek_non_whitespace_forward(bytes: &[u8], from: usize) -> Option<(usize, u8)> {
let mut idx = from;
if idx >= bytes.len() {
return None;
}
loop {
let b = bytes[idx];
if !b.is_ascii_whitespace() {
return Some((idx, b));
}
idx += 1;
if idx == bytes.len() {
return None;
}
}
}
#[inline]
pub(super) fn seek_non_whitespace_backward(bytes: &[u8], from: usize) -> Option<(usize, u8)> {
let mut idx = from;
if idx >= bytes.len() {
return None;
}
loop {
let b = bytes[idx];
if !b.is_ascii_whitespace() {
return Some((idx, b));
}
if idx == 0 {
return None;
}
idx -= 1;
}
}
#[inline]
pub(super) fn find_member(bytes: &[u8], from: usize, member: &JsonString) -> Option<usize> {
use memchr::memmem;
let finder = memmem::Finder::new(member.bytes_with_quotes());
let mut idx = from;
if bytes.len() <= idx {
return None;
}
loop {
match finder.find(&bytes[idx..bytes.len()]) {
Some(offset) => {
let starting_quote_idx = offset + idx;
if bytes[starting_quote_idx - 1] != b'\\' {
return Some(starting_quote_idx);
} else {
idx = starting_quote_idx + member.bytes_with_quotes().len() + 1;
}
}
None => return None,
}
}
}
#[inline]
pub(super) fn is_member_match(bytes: &[u8], from: usize, to: usize, member: &JsonString) -> bool {
if to >= bytes.len() {
return false;
}
let slice = &bytes[from..to + 1];
member.bytes_with_quotes() == slice && (from == 0 || bytes[from - 1] != b'\\')
}
}
#[cfg(test)]
mod tests {
use super::{in_slice, MAX_BLOCK_SIZE};
mod input_block_impl_for_slice {
use pretty_assertions::assert_eq;
#[test]
fn halves_splits_in_half() {
use super::super::InputBlock;
let bytes = r#"0123456789abcdef"#.as_bytes();
let (half1, half2) = <&[u8] as InputBlock<16>>::halves(&bytes);
assert_eq!(half1, "01234567".as_bytes());
assert_eq!(half2, "89abcdef".as_bytes());
}
}
mod pad_last_block {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn on_empty_bytes_is_all_zero() {
let result = in_slice::pad_last_block(&[]);
assert_eq!(result.absolute_start, 0);
assert_eq!(result.bytes, [0; MAX_BLOCK_SIZE]);
}
#[test]
fn on_bytes_smaller_than_full_block_gives_entire_block() {
let bytes = r#"{"test":42}"#.as_bytes();
let result = in_slice::pad_last_block(bytes);
assert_eq!(result.absolute_start, 0);
assert_eq!(&result.bytes[0..11], bytes);
assert_eq!(&result.bytes[11..], [0; MAX_BLOCK_SIZE - 11]);
}
#[test]
fn on_bytes_equal_to_full_block_gives_all_zero() {
let bytes = [42; MAX_BLOCK_SIZE];
let result = in_slice::pad_last_block(&bytes);
assert_eq!(result.absolute_start, MAX_BLOCK_SIZE);
assert_eq!(result.bytes, [0; MAX_BLOCK_SIZE]);
}
#[test]
fn on_bytes_longer_than_full_block_gives_last_fragment_padded() {
let mut bytes = [42; 2 * MAX_BLOCK_SIZE + 77];
bytes[2 * MAX_BLOCK_SIZE..].fill(69);
let result = in_slice::pad_last_block(&bytes);
assert_eq!(result.absolute_start, 2 * MAX_BLOCK_SIZE);
assert_eq!(result.bytes[0..77], [69; 77]);
assert_eq!(result.bytes[77..], [0; MAX_BLOCK_SIZE - 77]);
}
}
mod seek_backward {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn seeking_from_before_first_occurrence_returns_none() {
let bytes = r#"{"seek":42}"#.as_bytes();
let result = in_slice::seek_backward(bytes, 6, b':');
assert_eq!(result, None);
}
#[test]
fn seeking_from_after_two_occurrences_returns_the_second_one() {
let bytes = r#"{"seek":42,"find":37}"#.as_bytes();
let result = in_slice::seek_backward(bytes, bytes.len() - 1, b':');
assert_eq!(result, Some(17));
}
}
mod seek_forward_1 {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn in_empty_slice_returns_none() {
let bytes = [];
let result = in_slice::seek_forward(&bytes, 0, [0]);
assert_eq!(result, None);
}
#[test]
fn seeking_from_needle_returns_that() {
let bytes = r#"{"seek": 42}"#.as_bytes();
let result = in_slice::seek_forward(bytes, 7, [b':']);
assert_eq!(result, Some((7, b':')));
}
#[test]
fn seeking_from_not_needle_returns_next_needle() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_forward(bytes, 5, [b'2']);
assert_eq!(result, Some((9, b'2')));
}
#[test]
fn seeking_from_not_needle_when_there_is_no_needle_returns_none() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_forward(bytes, 5, [b'3']);
assert_eq!(result, None);
}
}
mod seek_forward_2 {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn in_empty_slice_returns_none() {
let bytes = [];
let result = in_slice::seek_forward(&bytes, 0, [0, 1]);
assert_eq!(result, None);
}
#[test]
fn seeking_from_needle_1_returns_that() {
let bytes = r#"{"seek": 42}"#.as_bytes();
let result = in_slice::seek_forward(bytes, 7, [b':', b'4']);
assert_eq!(result, Some((7, b':')));
}
#[test]
fn seeking_from_needle_2_returns_that() {
let bytes = r#"{"seek": 42}"#.as_bytes();
let result = in_slice::seek_forward(bytes, 7, [b'4', b':']);
assert_eq!(result, Some((7, b':')));
}
#[test]
fn seeking_from_not_needle_when_next_is_needle_1_returns_that() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_forward(bytes, 5, [b'4', b'2']);
assert_eq!(result, Some((8, b'4')));
}
#[test]
fn seeking_from_not_needle_when_next_is_needle_2_returns_that() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_forward(bytes, 5, [b'2', b'4']);
assert_eq!(result, Some((8, b'4')));
}
#[test]
fn seeking_from_not_needle_when_there_is_no_needle_returns_none() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_forward(bytes, 5, [b'3', b'0']);
assert_eq!(result, None);
}
}
mod seek_non_whitespace_forward {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn in_empty_slice_returns_none() {
let bytes = [];
let result = in_slice::seek_non_whitespace_forward(&bytes, 0);
assert_eq!(result, None);
}
#[test]
fn seeking_from_non_whitespace_returns_that() {
let bytes = r#"{"seek": 42}"#.as_bytes();
let result = in_slice::seek_non_whitespace_forward(bytes, 7);
assert_eq!(result, Some((7, b':')));
}
#[test]
fn seeking_from_whitespace_returns_next_non_whitespace() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_non_whitespace_forward(bytes, 5);
assert_eq!(result, Some((8, b'4')));
}
#[test]
fn seeking_from_whitespace_when_there_is_no_more_non_whitespace_returns_none() {
let bytes = "seek: \t\n ".as_bytes();
let result = in_slice::seek_non_whitespace_forward(bytes, 5);
assert_eq!(result, None);
}
}
mod seek_non_whitespace_backward {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn in_empty_slice_returns_none() {
let bytes = [];
let result = in_slice::seek_non_whitespace_backward(&bytes, 0);
assert_eq!(result, None);
}
#[test]
fn seeking_from_non_whitespace_returns_that() {
let bytes = r#"{"seek": 42}"#.as_bytes();
let result = in_slice::seek_non_whitespace_backward(bytes, 7);
assert_eq!(result, Some((7, b':')));
}
#[test]
fn seeking_from_whitespace_returns_previous_non_whitespace() {
let bytes = "seek: \t\n42}".as_bytes();
let result = in_slice::seek_non_whitespace_backward(bytes, 7);
assert_eq!(result, Some((4, b':')));
}
}
mod find_member {
use super::*;
use crate::query::JsonString;
use pretty_assertions::assert_eq;
#[test]
fn in_empty_slice_returns_none() {
let bytes = [];
let result = in_slice::find_member(&bytes, 0, &JsonString::new("abc"));
assert_eq!(result, None);
}
#[test]
fn starting_from_before_first_occurrence_returns_that() {
let bytes = r#"{"needle":42,"other":37}"#.as_bytes();
let result = in_slice::find_member(bytes, 0, &JsonString::new("needle"));
assert_eq!(result, Some(1));
}
#[test]
fn starting_from_exactly_first_occurrence_returns_that() {
let bytes = r#"{"needle":42,"other":37}"#.as_bytes();
let result = in_slice::find_member(bytes, 1, &JsonString::new("needle"));
assert_eq!(result, Some(1));
}
#[test]
fn starting_from_after_last_occurrence_returns_none() {
let bytes = r#"{"needle":42,"other":37}"#.as_bytes();
let result = in_slice::find_member(bytes, 2, &JsonString::new("needle"));
assert_eq!(result, None);
}
#[test]
fn when_match_is_partial_due_to_escaped_double_quote_returns_none() {
let bytes = r#"{"fake\"needle":42,"other":37}"#.as_bytes();
let result = in_slice::find_member(bytes, 0, &JsonString::new("needle"));
assert_eq!(result, None);
}
#[test]
fn when_looking_for_string_with_escaped_double_quote_returns_that() {
let bytes = r#"{"fake\"needle":42,"other":37}"#.as_bytes();
let result = in_slice::find_member(bytes, 0, &JsonString::new(r#"fake\"needle"#));
assert_eq!(result, Some(1));
}
}
mod is_member_match {
use super::*;
use crate::query::JsonString;
use pretty_assertions::assert_eq;
#[test]
fn on_exact_match_returns_true() {
let bytes = r#"{"needle":42,"other":37}"#.as_bytes();
let result = in_slice::is_member_match(bytes, 1, 8, &JsonString::new("needle"));
assert_eq!(result, true);
}
#[test]
fn matching_without_double_quotes_returns_false() {
let bytes = r#"{"needle":42,"other":37}"#.as_bytes();
let result = in_slice::is_member_match(bytes, 2, 7, &JsonString::new("needle"));
assert_eq!(result, false);
}
#[test]
fn when_match_is_partial_due_to_escaped_double_quote_returns_false() {
let bytes = r#"{"fake\"needle":42,"other":37}"#.as_bytes();
let result = in_slice::is_member_match(bytes, 7, 14, &JsonString::new("needle"));
assert_eq!(result, false);
}
#[test]
fn when_looking_for_string_with_escaped_double_quote_returns_true() {
let bytes = r#"{"fake\"needle":42,"other":37}"#.as_bytes();
let result = in_slice::is_member_match(bytes, 1, 14, &JsonString::new(r#"fake\"needle"#));
assert_eq!(result, true);
}
}
}