use anyhow::{Context, Result};
use memchr::memchr_iter;
use rayon::prelude::*;
use regex::Regex;
use std::collections::HashSet;
use std::io::{Read, Write};
use crate::filter::{BoolOp, CmpOp, Env, Filter};
use crate::output::{self, OutputConfig};
use crate::simdjson;
#[derive(Debug)]
enum NdjsonFastPath {
None,
FieldChain(Vec<String>),
SelectEq {
fields: Vec<String>,
op: CmpOp,
literal_bytes: Vec<u8>,
},
Length(Vec<String>),
Keys { fields: Vec<String>, sorted: bool },
Type(Vec<String>),
Has { fields: Vec<String>, key: String },
SelectEqField {
pred_fields: Vec<String>,
op: CmpOp,
literal_bytes: Vec<u8>,
out_fields: Vec<String>,
},
MultiFieldObj {
entries: Vec<(Vec<u8>, Vec<String>)>,
},
MultiFieldArr { entries: Vec<Vec<String>> },
SelectEqObj {
pred_fields: Vec<String>,
op: CmpOp,
literal_bytes: Vec<u8>,
entries: Vec<(Vec<u8>, Vec<String>)>,
},
SelectEqArr {
pred_fields: Vec<String>,
op: CmpOp,
literal_bytes: Vec<u8>,
entries: Vec<Vec<String>>,
},
SelectCompound {
conditions: Vec<(Vec<String>, CmpOp, Vec<u8>)>,
bool_op: BoolOp,
},
SelectStringPred {
fields: Vec<String>,
pred: StringPred,
},
SelectStringPredField {
pred_fields: Vec<String>,
pred: StringPred,
out_fields: Vec<String>,
},
}
#[derive(Debug)]
enum StringPred {
Test(Regex),
StartsWith(String),
EndsWith(String),
Contains(String),
}
const CHUNK_TARGET_SIZE: usize = 1024 * 1024;
type ChunkResult = (Vec<u8>, bool, Vec<u8>);
fn flush_errors(errors: &[u8]) -> bool {
if !errors.is_empty() {
use std::io::Write;
let _ = std::io::stderr().write_all(errors);
true
} else {
false
}
}
pub fn process_ndjson_file<W: Write>(
path: &std::path::Path,
filter: &Filter,
config: &OutputConfig,
env: &Env,
force_jsonl: bool,
out: &mut W,
) -> Result<Option<bool>> {
#[cfg(unix)]
if std::env::var_os("QJ_NO_MMAP").is_none() {
return process_ndjson_file_mmap(path, filter, config, env, force_jsonl, out);
}
process_ndjson_file_streaming(path, filter, config, env, force_jsonl, out)
}
#[cfg(unix)]
fn process_ndjson_file_mmap<W: Write>(
path: &std::path::Path,
filter: &Filter,
config: &OutputConfig,
env: &Env,
force_jsonl: bool,
out: &mut W,
) -> Result<Option<bool>> {
use std::os::unix::io::AsRawFd;
let file =
std::fs::File::open(path).with_context(|| format!("failed to open file: {path:?}"))?;
let file_len = file.metadata()?.len() as usize;
if file_len == 0 {
return Ok(None);
}
let fd = file.as_raw_fd();
let base_ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
file_len,
libc::PROT_READ,
libc::MAP_PRIVATE,
fd,
0,
)
};
if base_ptr == libc::MAP_FAILED {
return process_ndjson_file_streaming(path, filter, config, env, force_jsonl, out);
}
unsafe {
libc::madvise(base_ptr, file_len, libc::MADV_SEQUENTIAL);
}
let peek_len = file_len.min(64 * 1024);
let peek_data = unsafe { std::slice::from_raw_parts(base_ptr as *const u8, peek_len) };
if !force_jsonl && !is_ndjson(peek_data) {
unsafe {
libc::munmap(base_ptr, file_len);
}
return Ok(None);
}
let needs_env = if env.is_empty() {
false
} else {
let mut var_refs = HashSet::new();
filter.collect_var_refs(&mut var_refs);
var_refs.iter().any(|v| env.get_var(v).is_some())
};
let use_parallel = !needs_env && filter.is_parallel_safe();
let fast_path = if use_parallel {
detect_fast_path(filter)
} else {
NdjsonFastPath::None
};
let ws = window_size();
let mut had_output = false;
let mut file_offset: usize = 0;
while file_offset < file_len {
let raw_end = (file_offset + ws).min(file_len);
let at_eof = raw_end == file_len;
let window_data = unsafe {
std::slice::from_raw_parts(
(base_ptr as *const u8).add(file_offset),
raw_end - file_offset,
)
};
let process_len = if at_eof {
window_data.len()
} else {
match memchr::memrchr(b'\n', window_data) {
Some(pos) => pos + 1,
None => window_data.len(),
}
};
let to_process = &window_data[..process_len];
if use_parallel {
let chunks = split_chunks(to_process, CHUNK_TARGET_SIZE);
if chunks.len() <= 1 {
let (chunk_out, ho, errs) =
process_chunk(to_process, filter, config, &fast_path, env)?;
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
} else {
let shared = SharedFilter::new(filter);
let results: Result<Vec<ChunkResult>> = chunks
.par_iter()
.map(|&chunk| {
let empty_env = Env::empty();
process_chunk(chunk, shared.get(), config, &fast_path, &empty_env)
})
.collect();
let results = results?;
for (chunk_out, ho, errs) in results {
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
}
}
} else {
let (chunk_out, ho, errs) = process_chunk(to_process, filter, config, &fast_path, env)?;
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
}
file_offset += process_len;
if file_offset < file_len {
let prefetch_end = (file_offset + ws).min(file_len);
unsafe {
libc::madvise(
(base_ptr as *const u8).add(file_offset) as *mut libc::c_void,
prefetch_end - file_offset,
libc::MADV_WILLNEED,
);
}
}
}
unsafe {
libc::munmap(base_ptr, file_len);
}
Ok(Some(had_output))
}
fn process_ndjson_file_streaming<W: Write>(
path: &std::path::Path,
filter: &Filter,
config: &OutputConfig,
env: &Env,
force_jsonl: bool,
out: &mut W,
) -> Result<Option<bool>> {
use std::io::Seek;
let mut file =
std::fs::File::open(path).with_context(|| format!("failed to open file: {path:?}"))?;
if force_jsonl
|| detect_ndjson_from_reader(&mut file)
.with_context(|| format!("failed to read file: {path:?}"))?
{
file.seek(std::io::SeekFrom::Start(0))
.with_context(|| format!("failed to seek file: {path:?}"))?;
let ho = process_ndjson_streaming(&mut file, filter, config, env, out)
.with_context(|| format!("failed to process NDJSON: {path:?}"))?;
return Ok(Some(ho));
}
Ok(None)
}
pub fn is_ndjson(buf: &[u8]) -> bool {
let first_nl = match memchr::memchr(b'\n', buf) {
Some(pos) => pos,
None => return false,
};
let first_line = &buf[..first_nl];
let first_byte = match first_line
.iter()
.find(|&&b| !matches!(b, b' ' | b'\t' | b'\r'))
{
Some(&b) => b,
None => return false,
};
if first_byte != b'{' && first_byte != b'[' {
return false;
}
let last_byte = match first_line
.iter()
.rfind(|&&b| !matches!(b, b' ' | b'\t' | b'\r'))
{
Some(&b) => b,
None => return false,
};
if last_byte != b'}' && last_byte != b']' {
return false;
}
let rest = &buf[first_nl + 1..];
for &b in rest {
match b {
b' ' | b'\t' | b'\r' | b'\n' => continue,
b'{' | b'[' => return true,
_ => return false,
}
}
false
}
pub fn detect_ndjson_from_reader<R: Read>(reader: &mut R) -> Result<bool> {
let mut buf = vec![0u8; 64 * 1024];
let mut filled = 0;
loop {
let cap = buf.len();
let bytes_read = read_fully(reader, &mut buf[filled..cap])?;
filled += bytes_read;
if is_ndjson(&buf[..filled]) {
return Ok(true);
}
if filled < buf.len() {
return Ok(false);
}
if buf.len() >= 1024 * 1024 {
return Ok(false);
}
buf.resize(buf.len() * 2, 0);
}
}
pub fn split_chunks(buf: &[u8], target_size: usize) -> Vec<&[u8]> {
if buf.is_empty() {
return vec![];
}
let mut chunks = Vec::new();
let mut start = 0;
while start < buf.len() {
let boundary = start.saturating_add(target_size);
if boundary >= buf.len() {
chunks.push(&buf[start..]);
break;
}
match memchr::memchr(b'\n', &buf[boundary..]) {
Some(offset) => {
let end = boundary + offset + 1;
chunks.push(&buf[start..end]);
start = end;
}
None => {
chunks.push(&buf[start..]);
break;
}
}
}
chunks
}
pub fn process_ndjson(
data: &[u8],
filter: &Filter,
config: &OutputConfig,
env: &Env,
) -> Result<ChunkResult> {
let needs_env = if env.is_empty() {
false
} else {
let mut var_refs = HashSet::new();
filter.collect_var_refs(&mut var_refs);
var_refs.iter().any(|v| env.get_var(v).is_some())
};
if needs_env || !filter.is_parallel_safe() {
return process_chunk(data, filter, config, &NdjsonFastPath::None, env);
}
let fast_path = detect_fast_path(filter);
let chunks = split_chunks(data, CHUNK_TARGET_SIZE);
if chunks.len() <= 1 {
return process_chunk(data, filter, config, &fast_path, env);
}
let shared = SharedFilter::new(filter);
let results: Result<Vec<ChunkResult>> = chunks
.par_iter()
.map(|&chunk| {
let empty_env = Env::empty();
process_chunk(chunk, shared.get(), config, &fast_path, &empty_env)
})
.collect();
let results = results?;
let total_size: usize = results.iter().map(|(buf, _, _)| buf.len()).sum();
let mut out = Vec::with_capacity(total_size);
let mut had_output = false;
let mut errors = Vec::new();
for (buf, ho, errs) in results {
out.extend_from_slice(&buf);
had_output |= ho;
errors.extend_from_slice(&errs);
}
Ok((out, had_output, errors))
}
#[doc(hidden)]
pub fn process_ndjson_no_fast_path(
data: &[u8],
filter: &Filter,
config: &OutputConfig,
env: &Env,
) -> Result<ChunkResult> {
process_chunk(data, filter, config, &NdjsonFastPath::None, env)
}
pub fn process_ndjson_windowed<W: Write>(
data: &[u8],
filter: &Filter,
config: &OutputConfig,
env: &Env,
out: &mut W,
) -> Result<bool> {
let needs_env = if env.is_empty() {
false
} else {
let mut var_refs = HashSet::new();
filter.collect_var_refs(&mut var_refs);
var_refs.iter().any(|v| env.get_var(v).is_some())
};
let use_parallel = !needs_env && filter.is_parallel_safe();
let fast_path = if use_parallel {
detect_fast_path(filter)
} else {
NdjsonFastPath::None
};
let window_size = window_size();
let mut had_output = false;
let mut offset = 0;
while offset < data.len() {
let end = (offset + window_size).min(data.len());
let process_end = if end == data.len() {
end
} else {
match memchr::memrchr(b'\n', &data[offset..end]) {
Some(pos) => offset + pos + 1,
None => end, }
};
let window_data = &data[offset..process_end];
if use_parallel {
let chunks = split_chunks(window_data, CHUNK_TARGET_SIZE);
if chunks.len() <= 1 {
let (chunk_out, ho, errs) =
process_chunk(window_data, filter, config, &fast_path, env)?;
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
} else {
let shared = SharedFilter::new(filter);
let results: Result<Vec<ChunkResult>> = chunks
.par_iter()
.map(|&chunk| {
let empty_env = Env::empty();
process_chunk(chunk, shared.get(), config, &fast_path, &empty_env)
})
.collect();
let results = results?;
for (chunk_out, ho, errs) in results {
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
}
}
} else {
let (chunk_out, ho, errs) =
process_chunk(window_data, filter, config, &fast_path, env)?;
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
}
offset = process_end;
}
Ok(had_output)
}
const MIN_WINDOW_SIZE: usize = 32 * 1024 * 1024;
const MAX_WINDOW_SIZE: usize = 128 * 1024 * 1024;
fn window_size() -> usize {
if let Some(val) = std::env::var_os("QJ_WINDOW_SIZE")
&& let Some(mb) = val.to_str().and_then(|s| s.parse::<usize>().ok())
{
return mb * 1024 * 1024;
}
let num_threads = rayon::current_num_threads();
(num_threads * CHUNK_TARGET_SIZE * 32).clamp(MIN_WINDOW_SIZE, MAX_WINDOW_SIZE)
}
pub fn process_ndjson_streaming<R: Read, W: Write>(
reader: &mut R,
filter: &Filter,
config: &OutputConfig,
env: &Env,
out: &mut W,
) -> Result<bool> {
let needs_env = if env.is_empty() {
false
} else {
let mut var_refs = HashSet::new();
filter.collect_var_refs(&mut var_refs);
var_refs.iter().any(|v| env.get_var(v).is_some())
};
let use_parallel = !needs_env && filter.is_parallel_safe();
let fast_path = if use_parallel {
detect_fast_path(filter)
} else {
NdjsonFastPath::None
};
let window_size = window_size();
let mut buf = vec![0u8; window_size];
let mut carry_len: usize = 0;
let mut had_output = false;
loop {
let max_read = buf.len() - carry_len;
let bytes_read = read_fully(reader, &mut buf[carry_len..carry_len + max_read])?;
if bytes_read == 0 && carry_len == 0 {
break; }
let data_len = carry_len + bytes_read;
let at_eof = bytes_read < max_read;
let (process_len, next_carry_len) = if at_eof {
(data_len, 0)
} else {
match memchr::memrchr(b'\n', &buf[..data_len]) {
Some(pos) => (pos + 1, data_len - (pos + 1)),
None => {
carry_len = data_len;
buf.resize(buf.len() * 2, 0);
continue;
}
}
};
let window_data = &buf[..process_len];
if use_parallel {
let chunks = split_chunks(window_data, CHUNK_TARGET_SIZE);
if chunks.len() <= 1 {
let (chunk_out, ho, errs) =
process_chunk(window_data, filter, config, &fast_path, env)?;
out.write_all(&chunk_out)?;
flush_errors(&errs);
had_output |= ho;
} else {
let shared = SharedFilter::new(filter);
let results: Result<Vec<ChunkResult>> = chunks
.par_iter()
.map(|&chunk| {
let empty_env = Env::empty();
process_chunk(chunk, shared.get(), config, &fast_path, &empty_env)
})
.collect();
let results = results?;
for (chunk_out, ho, errs) in results {
out.write_all(&chunk_out)?;
had_output |= ho;
flush_errors(&errs);
}
}
} else {
let (chunk_out, ho, errs) =
process_chunk(window_data, filter, config, &fast_path, env)?;
out.write_all(&chunk_out)?;
flush_errors(&errs);
had_output |= ho;
}
if at_eof {
break;
}
if next_carry_len > 0 {
buf.copy_within(process_len..process_len + next_carry_len, 0);
}
carry_len = next_carry_len;
}
Ok(had_output)
}
fn read_fully<R: Read>(reader: &mut R, buf: &mut [u8]) -> Result<usize> {
let mut total = 0;
while total < buf.len() {
match reader.read(&mut buf[total..]) {
Ok(0) => break,
Ok(n) => total += n,
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e.into()),
}
}
Ok(total)
}
fn detect_fast_path(filter: &Filter) -> NdjsonFastPath {
if std::env::var_os("QJ_NO_FAST_PATH").is_some() {
return NdjsonFastPath::None;
}
let mut fields = Vec::new();
if crate::filter::collect_field_chain(filter, &mut fields) && !fields.is_empty() {
return NdjsonFastPath::FieldChain(fields);
}
if let Some(fp) = detect_select_extract_fast_path(filter) {
return fp;
}
if let Some(fp) = detect_select_fast_path(filter) {
return fp;
}
if let Some(fp) = detect_select_string_pred_fast_path(filter) {
return fp;
}
if let Some(fp) = detect_multi_field_fast_path(filter) {
return fp;
}
if let Some(fp) = detect_length_keys_fast_path(filter) {
return fp;
}
NdjsonFastPath::None
}
struct SharedFilter {
ptr: *const Filter,
}
unsafe impl Send for SharedFilter {}
unsafe impl Sync for SharedFilter {}
impl SharedFilter {
fn new(filter: &Filter) -> Self {
Self {
ptr: filter as *const Filter,
}
}
fn get(&self) -> &Filter {
unsafe { &*self.ptr }
}
}
fn process_chunk(
chunk: &[u8],
filter: &Filter,
config: &OutputConfig,
fast_path: &NdjsonFastPath,
env: &Env,
) -> Result<ChunkResult> {
let mut output_buf = Vec::with_capacity(chunk.len() / 2);
let mut had_output = false;
match fast_path {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
if let Some(result) = process_chunk_select_eq_fused(
chunk,
fields,
*op,
literal_bytes,
config,
&mut output_buf,
&mut had_output,
) {
result?;
let error_buf = Vec::new();
return Ok((output_buf, had_output, error_buf));
}
output_buf.clear();
had_output = false;
}
NdjsonFastPath::SelectCompound {
conditions,
bool_op,
} => {
if let Some(result) = process_chunk_select_compound_fused(
chunk,
conditions,
*bool_op,
config,
&mut output_buf,
&mut had_output,
) {
result?;
let error_buf = Vec::new();
return Ok((output_buf, had_output, error_buf));
}
output_buf.clear();
had_output = false;
}
_ => {}
}
let mut scratch = Vec::new();
let mut dom_parser = if matches!(fast_path, NdjsonFastPath::None) {
None
} else {
Some(simdjson::DomParser::new()?)
};
let mut error_buf = Vec::new();
let mut start = 0;
for nl_pos in memchr_iter(b'\n', chunk) {
let line = &chunk[start..nl_pos];
start = nl_pos + 1;
process_line(
line,
filter,
config,
fast_path,
env,
&mut output_buf,
&mut had_output,
&mut error_buf,
&mut scratch,
&mut dom_parser,
)?;
}
if start < chunk.len() {
process_line(
&chunk[start..],
filter,
config,
fast_path,
env,
&mut output_buf,
&mut had_output,
&mut error_buf,
&mut scratch,
&mut dom_parser,
)?;
}
Ok((output_buf, had_output, error_buf))
}
fn unescape_json_string(data: &[u8], out: &mut Vec<u8>) {
let mut i = 0;
while i < data.len() {
if data[i] == b'\\' && i + 1 < data.len() {
match data[i + 1] {
b'"' => {
out.push(b'"');
i += 2;
}
b'\\' => {
out.push(b'\\');
i += 2;
}
b'/' => {
out.push(b'/');
i += 2;
}
b'n' => {
out.push(b'\n');
i += 2;
}
b't' => {
out.push(b'\t');
i += 2;
}
b'r' => {
out.push(b'\r');
i += 2;
}
b'b' => {
out.push(0x08);
i += 2;
}
b'f' => {
out.push(0x0C);
i += 2;
}
b'u' if i + 5 < data.len() => {
if let Ok(s) = std::str::from_utf8(&data[i + 2..i + 6])
&& let Ok(cp) = u16::from_str_radix(s, 16)
{
if (0xD800..=0xDBFF).contains(&cp)
&& i + 11 < data.len()
&& data[i + 6] == b'\\'
&& data[i + 7] == b'u'
&& let Ok(s2) = std::str::from_utf8(&data[i + 8..i + 12])
&& let Ok(cp2) = u16::from_str_radix(s2, 16)
{
let full =
0x10000 + ((cp as u32 - 0xD800) << 10) + (cp2 as u32 - 0xDC00);
if let Some(c) = char::from_u32(full) {
let mut buf = [0u8; 4];
out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
i += 12;
continue;
}
}
if let Some(c) = char::from_u32(cp as u32) {
let mut buf = [0u8; 4];
out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
i += 6;
continue;
}
out.push(data[i]);
i += 1;
}
_ => {
out.push(data[i]);
i += 1;
}
}
} else {
out.push(data[i]);
i += 1;
}
}
}
fn drain_eval_errors(error_buf: &mut Vec<u8>) {
if let Some(err) = crate::filter::eval::take_last_error() {
let msg = match &err {
crate::value::Value::String(s) => s.as_str().to_owned(),
other => other.short_desc(),
};
let _ = writeln!(error_buf, "qj: error: {msg}");
}
}
#[inline]
fn write_line_terminator(output_buf: &mut Vec<u8>, config: &OutputConfig) {
if config.null_separator {
output_buf.push(0);
} else if !config.join_output {
output_buf.push(b'\n');
}
}
#[inline]
fn emit_raw_field(output_buf: &mut Vec<u8>, raw: &[u8], config: &OutputConfig) {
if config.mode == output::OutputMode::Raw
&& raw.len() >= 2
&& raw[0] == b'"'
&& raw[raw.len() - 1] == b'"'
{
let inner = &raw[1..raw.len() - 1];
unescape_json_string(inner, output_buf);
} else {
output_buf.extend_from_slice(raw);
}
}
fn json_key_bytes(key: &str) -> Vec<u8> {
let mut buf = Vec::with_capacity(key.len() + 2);
buf.push(b'"');
for &b in key.as_bytes() {
match b {
b'"' => buf.extend_from_slice(b"\\\""),
b'\\' => buf.extend_from_slice(b"\\\\"),
b if b < 0x20 => {
buf.extend_from_slice(format!("\\u{:04x}", b).as_bytes());
}
_ => buf.push(b),
}
}
buf.push(b'"');
buf
}
fn prepare_padded<'a>(trimmed: &[u8], scratch: &'a mut Vec<u8>) -> &'a [u8] {
let pad = simdjson::padding();
let needed = trimmed.len() + pad;
if scratch.len() < needed {
scratch.resize(needed, 0);
}
scratch[..trimmed.len()].copy_from_slice(trimmed);
scratch[trimmed.len()..trimmed.len() + pad].fill(0);
&scratch[..needed]
}
#[allow(clippy::too_many_arguments)]
fn process_line(
line: &[u8],
filter: &Filter,
config: &OutputConfig,
fast_path: &NdjsonFastPath,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
error_buf: &mut Vec<u8>,
scratch: &mut Vec<u8>,
dom_parser: &mut Option<simdjson::DomParser>,
) -> Result<()> {
let end = line
.iter()
.rposition(|&b| !matches!(b, b' ' | b'\t' | b'\r'))
.map_or(0, |p| p + 1);
let start = line[..end]
.iter()
.position(|&b| !matches!(b, b' ' | b'\t' | b'\r'))
.unwrap_or(end);
let trimmed = &line[start..end];
if trimmed.is_empty() {
return Ok(());
}
let none_path = NdjsonFastPath::None;
let fast_path = if !matches!(fast_path, NdjsonFastPath::None) && trimmed[0] != b'{' {
&none_path
} else {
fast_path
};
match fast_path {
NdjsonFastPath::FieldChain(fields) => {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
let dp = dom_parser.as_mut().unwrap();
let raw = dp
.find_field_raw(padded, trimmed.len(), &field_refs)
.context("failed to extract field from NDJSON line")?;
*had_output = true;
emit_raw_field(output_buf, &raw, config);
write_line_terminator(output_buf, config);
}
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
if let Some(result) = process_line_select_eq_raw(
trimmed,
fields,
*op,
literal_bytes,
config,
output_buf,
had_output,
) {
result?;
} else {
process_line_select_eq(
trimmed,
fields,
*op,
literal_bytes,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
}
NdjsonFastPath::SelectCompound {
conditions,
bool_op,
} => {
if let Some(result) = process_line_select_compound_raw(
trimmed, conditions, *bool_op, config, output_buf, had_output,
) {
result?;
} else {
let padded = prepare_padded(trimmed, scratch);
let flat_buf = simdjson::dom_parse_to_flat_buf(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::flat_eval::eval_flat(filter, flat_buf.root(), env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
NdjsonFastPath::Length(fields) => {
process_line_length(
trimmed,
fields,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::Keys { fields, sorted } => {
process_line_keys(
trimmed,
fields,
*sorted,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::Type(fields) => {
process_line_type(
trimmed,
fields,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::Has { fields, key } => {
process_line_has(
trimmed,
fields,
key,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::SelectEqField {
pred_fields,
op,
literal_bytes,
out_fields,
} => {
process_line_select_eq_field(
trimmed,
pred_fields,
*op,
literal_bytes,
out_fields,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::MultiFieldObj { entries } => {
process_line_multi_field_obj(
trimmed,
entries,
config,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::MultiFieldArr { entries } => {
process_line_multi_field_arr(
trimmed,
entries,
config,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::SelectEqObj {
pred_fields,
op,
literal_bytes,
entries,
} => {
process_line_select_eq_obj(
trimmed,
pred_fields,
*op,
literal_bytes,
entries,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::SelectEqArr {
pred_fields,
op,
literal_bytes,
entries,
} => {
process_line_select_eq_arr(
trimmed,
pred_fields,
*op,
literal_bytes,
entries,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::SelectStringPred { fields, pred } => {
process_line_select_string_pred(
trimmed,
fields,
pred,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::SelectStringPredField {
pred_fields,
pred,
out_fields,
} => {
process_line_select_string_pred_field(
trimmed,
pred_fields,
pred,
out_fields,
filter,
config,
env,
output_buf,
had_output,
scratch,
dom_parser.as_mut().unwrap(),
)?;
}
NdjsonFastPath::None => {
let padded = prepare_padded(trimmed, scratch);
let flat_buf = simdjson::dom_parse_to_flat_buf(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::flat_eval::eval_flat(filter, flat_buf.root(), env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
drain_eval_errors(error_buf);
Ok(())
}
fn detect_select_fast_path(filter: &Filter) -> Option<NdjsonFastPath> {
let inner = match filter {
Filter::Select(inner) => inner,
_ => return None,
};
if let Filter::Compare(lhs, op, rhs) = inner.as_ref() {
let (fields, literal_bytes) = if let Some((f, b)) = try_field_literal(lhs, rhs) {
(f, b)
} else if let Some((f, b)) = try_field_literal(rhs, lhs) {
(f, b)
} else {
return None;
};
return Some(NdjsonFastPath::SelectEq {
fields,
op: *op,
literal_bytes,
});
}
if let Filter::BoolOp(..) = inner.as_ref() {
let mut conditions = Vec::new();
let mut bool_op = None;
if collect_compound_conditions(inner, &mut conditions, &mut bool_op)
&& let Some(op) = bool_op
{
return Some(NdjsonFastPath::SelectCompound {
conditions,
bool_op: op,
});
}
}
None
}
fn collect_compound_conditions(
filter: &Filter,
conditions: &mut Vec<(Vec<String>, CmpOp, Vec<u8>)>,
bool_op: &mut Option<BoolOp>,
) -> bool {
match filter {
Filter::BoolOp(lhs, op, rhs) => {
match bool_op {
Some(existing) if *existing != *op => return false,
Some(_) => {}
None => *bool_op = Some(*op),
}
collect_compound_conditions(lhs, conditions, bool_op)
&& collect_compound_conditions(rhs, conditions, bool_op)
}
Filter::Compare(lhs, op, rhs) => {
let (fields, literal_bytes) = if let Some((f, b)) = try_field_literal(lhs, rhs) {
(f, b)
} else if let Some((f, b)) = try_field_literal(rhs, lhs) {
(f, b)
} else {
return false;
};
conditions.push((fields, *op, literal_bytes));
true
}
_ => false,
}
}
fn try_field_literal(field_side: &Filter, lit_side: &Filter) -> Option<(Vec<String>, Vec<u8>)> {
let mut fields = Vec::new();
if !crate::filter::collect_field_chain(field_side, &mut fields) || fields.is_empty() {
return None;
}
let literal_bytes = serialize_literal(lit_side)?;
Some((fields, literal_bytes))
}
fn serialize_literal(filter: &Filter) -> Option<Vec<u8>> {
use crate::value::Value;
match filter {
Filter::Literal(Value::String(s)) => {
let mut buf = Vec::with_capacity(s.len() + 2);
buf.push(b'"');
for &b in s.as_bytes() {
match b {
b'"' => buf.extend_from_slice(b"\\\""),
b'\\' => buf.extend_from_slice(b"\\\\"),
b'\n' => buf.extend_from_slice(b"\\n"),
b'\r' => buf.extend_from_slice(b"\\r"),
b'\t' => buf.extend_from_slice(b"\\t"),
b if b < 0x20 => {
buf.extend_from_slice(format!("\\u{:04x}", b).as_bytes());
}
_ => buf.push(b),
}
}
buf.push(b'"');
Some(buf)
}
Filter::Literal(Value::Int(n)) => Some(n.to_string().into_bytes()),
Filter::Literal(Value::Double(f, _)) => {
let s = if f.fract() == 0.0 && f.is_finite() {
format!("{}", *f as i64)
} else {
format!("{}", f)
};
Some(s.into_bytes())
}
Filter::Literal(Value::Bool(b)) => Some(if *b {
b"true".to_vec()
} else {
b"false".to_vec()
}),
Filter::Literal(Value::Null) => Some(b"null".to_vec()),
_ => None,
}
}
fn json_type_tag(bytes: &[u8]) -> u8 {
match bytes.first() {
Some(b'"') => b'"', Some(b't') | Some(b'f') => b'b', Some(b'n') => b'n', Some(b'{') => b'{', Some(b'[') => b'[', Some(b'-') | Some(b'0'..=b'9') => b'0', _ => b'?',
}
}
fn bytes_mismatch_is_definitive(raw: &[u8], literal_bytes: &[u8]) -> bool {
let raw_type = json_type_tag(raw);
let lit_type = json_type_tag(literal_bytes);
if raw_type != lit_type {
return true;
}
match raw_type {
b'n' | b'b' => true,
b'"' => {
let raw_inner = &raw[1..raw.len().saturating_sub(1)];
let lit_inner = &literal_bytes[1..literal_bytes.len().saturating_sub(1)];
!raw_inner.contains(&b'\\') && !lit_inner.contains(&b'\\')
}
b'0' => {
let raw_is_plain_int = !raw.iter().any(|&b| b == b'.' || b == b'e' || b == b'E');
let lit_is_plain_int = !literal_bytes
.iter()
.any(|&b| b == b'.' || b == b'e' || b == b'E');
raw_is_plain_int && lit_is_plain_int
}
_ => false,
}
}
fn evaluate_select_predicate(raw: &[u8], literal_bytes: &[u8], op: CmpOp) -> Option<bool> {
match op {
CmpOp::Eq | CmpOp::Ne => {
if raw == literal_bytes {
Some(matches!(op, CmpOp::Eq))
} else if bytes_mismatch_is_definitive(raw, literal_bytes) {
Some(matches!(op, CmpOp::Ne))
} else {
None }
}
CmpOp::Lt | CmpOp::Le | CmpOp::Gt | CmpOp::Ge => {
let raw_type = json_type_tag(raw);
let lit_type = json_type_tag(literal_bytes);
if raw_type == b'0' && lit_type == b'0' {
if let (Some(a), Some(b)) =
(parse_json_number(raw), parse_json_number(literal_bytes))
{
let ord = a.partial_cmp(&b)?;
return Some(match op {
CmpOp::Lt => ord == std::cmp::Ordering::Less,
CmpOp::Le => ord != std::cmp::Ordering::Greater,
CmpOp::Gt => ord == std::cmp::Ordering::Greater,
CmpOp::Ge => ord != std::cmp::Ordering::Less,
_ => unreachable!(),
});
}
return None;
}
if raw_type == b'"' && lit_type == b'"' {
let raw_inner = &raw[1..raw.len().saturating_sub(1)];
let lit_inner = &literal_bytes[1..literal_bytes.len().saturating_sub(1)];
if !raw_inner.contains(&b'\\') && !lit_inner.contains(&b'\\') {
let ord = raw_inner.cmp(lit_inner);
return Some(match op {
CmpOp::Lt => ord == std::cmp::Ordering::Less,
CmpOp::Le => ord != std::cmp::Ordering::Greater,
CmpOp::Gt => ord == std::cmp::Ordering::Greater,
CmpOp::Ge => ord != std::cmp::Ordering::Less,
_ => unreachable!(),
});
}
return None;
}
None
}
}
}
fn parse_json_number(bytes: &[u8]) -> Option<f64> {
std::str::from_utf8(bytes)
.ok()
.and_then(|s| s.parse::<f64>().ok())
}
fn build_field_pattern(field: &str) -> Vec<u8> {
let field_bytes = field.as_bytes();
let mut pattern = Vec::with_capacity(1 + field_bytes.len() + 2);
pattern.push(b'"');
pattern.extend_from_slice(field_bytes);
pattern.push(b'"');
pattern.push(b':');
pattern
}
fn find_field_value_with_finder<'a>(
line: &'a [u8],
finder: &memchr::memmem::Finder<'_>,
pattern_len: usize,
) -> Option<&'a [u8]> {
let pos = finder.find(line)?;
if pos == 0 {
return None;
}
let prev = line[pos - 1];
if prev != b'{' && prev != b',' {
return None;
}
let value_start = pos + pattern_len;
if value_start >= line.len() {
return None;
}
extract_json_value(line, value_start)
}
fn find_field_value_memmem<'a>(line: &'a [u8], field: &str) -> Option<&'a [u8]> {
let pattern = build_field_pattern(field);
let finder = memchr::memmem::Finder::new(&pattern);
find_field_value_with_finder(line, &finder, pattern.len())
}
fn find_field_value_scan<'a>(line: &'a [u8], field: &str) -> Option<&'a [u8]> {
let field_bytes = field.as_bytes();
let len = line.len();
let mut i = 0;
let mut depth: u32 = 0;
let mut in_string = false;
while i < len {
let b = line[i];
if in_string {
if b == b'\\' {
i += 2; continue;
}
if b == b'"' {
in_string = false;
}
i += 1;
continue;
}
match b {
b'"' => {
if depth == 1 {
let key_start = i + 1;
let key_end = key_start + field_bytes.len();
if key_end < len
&& line[key_end] == b'"'
&& line[key_start..key_end] == *field_bytes
{
let mut j = key_end + 1;
while j < len && matches!(line[j], b' ' | b'\t' | b'\r' | b'\n') {
j += 1;
}
if j < len && line[j] == b':' {
j += 1;
while j < len && matches!(line[j], b' ' | b'\t' | b'\r' | b'\n') {
j += 1;
}
if j < len {
return extract_json_value(line, j);
}
}
}
in_string = true;
} else {
in_string = true;
}
i += 1;
}
b'{' | b'[' => {
depth += 1;
i += 1;
}
b'}' | b']' => {
depth = depth.saturating_sub(1);
i += 1;
}
_ => {
i += 1;
}
}
}
None
}
fn find_field_value_raw<'a>(line: &'a [u8], field: &str) -> Option<&'a [u8]> {
if let Some(value) = find_field_value_memmem(line, field) {
return Some(value);
}
find_field_value_scan(line, field)
}
fn find_field_value_raw_prebuilt<'a>(
line: &'a [u8],
field: &str,
finder: &memchr::memmem::Finder<'_>,
pattern_len: usize,
) -> Option<&'a [u8]> {
if let Some(value) = find_field_value_with_finder(line, finder, pattern_len) {
return Some(value);
}
find_field_value_scan(line, field)
}
fn extract_json_value(line: &[u8], start: usize) -> Option<&[u8]> {
let len = line.len();
if start >= len {
return None;
}
match line[start] {
b'"' => {
let mut i = start + 1;
while i < len {
match line[i] {
b'\\' => i += 2,
b'"' => return Some(&line[start..=i]),
_ => i += 1,
}
}
None
}
b'{' | b'[' => {
let close = if line[start] == b'{' { b'}' } else { b']' };
let mut depth: u32 = 1;
let mut in_str = false;
let mut i = start + 1;
while i < len {
let b = line[i];
if in_str {
if b == b'\\' {
i += 2;
continue;
}
if b == b'"' {
in_str = false;
}
i += 1;
continue;
}
match b {
b'"' => {
in_str = true;
i += 1;
}
b'{' | b'[' => {
depth += 1;
i += 1;
}
b'}' | b']' => {
depth -= 1;
if depth == 0 && b == close {
return Some(&line[start..=i]);
}
i += 1;
}
_ => i += 1,
}
}
None
}
_ => {
let mut i = start;
while i < len {
match line[i] {
b',' | b'}' | b']' | b' ' | b'\t' | b'\r' | b'\n' => {
return Some(&line[start..i]);
}
_ => i += 1,
}
}
Some(&line[start..len])
}
}
}
fn find_field_chain_raw<'a>(line: &'a [u8], fields: &[String]) -> Option<&'a [u8]> {
let mut current = line;
for (i, field) in fields.iter().enumerate() {
let value = find_field_value_raw(current, field)?;
if i + 1 < fields.len() {
if value.first() != Some(&b'{') {
return None;
}
current = value;
} else {
return Some(value);
}
}
None
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_eq_raw(
trimmed: &[u8],
fields: &[String],
op: CmpOp,
literal_bytes: &[u8],
config: &OutputConfig,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
) -> Option<Result<()>> {
if config.mode != output::OutputMode::Compact {
return None;
}
let raw = find_field_chain_raw(trimmed, fields)?;
match evaluate_select_predicate(raw, literal_bytes, op) {
Some(true) => {
*had_output = true;
if trimmed.len() > 1 && trimmed[0] == b'{' && trimmed[1] != b'"' && trimmed[1] != b'}' {
return None;
}
output_buf.extend_from_slice(trimmed);
write_line_terminator(output_buf, config);
Some(Ok(()))
}
Some(false) => Some(Ok(())),
None => None, }
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_compound_raw(
trimmed: &[u8],
conditions: &[(Vec<String>, CmpOp, Vec<u8>)],
bool_op: BoolOp,
config: &OutputConfig,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
) -> Option<Result<()>> {
if config.mode != output::OutputMode::Compact {
return None;
}
let result = match bool_op {
BoolOp::And => {
for (fields, op, literal_bytes) in conditions {
let raw = find_field_chain_raw(trimmed, fields).unwrap_or(b"null");
match evaluate_select_predicate(raw, literal_bytes, *op) {
Some(false) => return Some(Ok(())), Some(true) => continue, None => return None, }
}
true }
BoolOp::Or => {
let mut any_ambiguous = false;
for (fields, op, literal_bytes) in conditions {
let raw = find_field_chain_raw(trimmed, fields).unwrap_or(b"null");
match evaluate_select_predicate(raw, literal_bytes, *op) {
Some(true) => {
*had_output = true;
if trimmed.len() > 1
&& trimmed[0] == b'{'
&& trimmed[1] != b'"'
&& trimmed[1] != b'}'
{
return None;
}
output_buf.extend_from_slice(trimmed);
write_line_terminator(output_buf, config);
return Some(Ok(()));
}
Some(false) => continue, None => {
any_ambiguous = true;
continue; }
}
}
if any_ambiguous {
return None; }
false }
};
if result {
*had_output = true;
if trimmed.len() > 1 && trimmed[0] == b'{' && trimmed[1] != b'"' && trimmed[1] != b'}' {
return None;
}
output_buf.extend_from_slice(trimmed);
write_line_terminator(output_buf, config);
}
Some(Ok(()))
}
fn process_chunk_select_eq_fused(
chunk: &[u8],
fields: &[String],
op: CmpOp,
literal_bytes: &[u8],
config: &OutputConfig,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
) -> Option<Result<()>> {
if config.mode != output::OutputMode::Compact || fields.len() != 1 {
return None;
}
let pattern = build_field_pattern(&fields[0]);
let finder = memchr::memmem::Finder::new(&pattern);
let len = chunk.len();
let mut line_start: usize = 0;
while line_start < len {
let nl_pos = memchr::memchr(b'\n', &chunk[line_start..])
.map(|p| line_start + p)
.unwrap_or(len);
let line = &chunk[line_start..nl_pos];
line_start = nl_pos + 1;
let end = line
.iter()
.rposition(|&b| !matches!(b, b' ' | b'\t' | b'\r'))
.map_or(0, |p| p + 1);
let start = line[..end]
.iter()
.position(|&b| !matches!(b, b' ' | b'\t' | b'\r'))
.unwrap_or(end);
let trimmed = &line[start..end];
if trimmed.is_empty() || trimmed[0] != b'{' {
continue;
}
if trimmed.len() > 1 && trimmed[1] != b'"' && trimmed[1] != b'}' {
return None;
}
let value = find_field_value_raw_prebuilt(trimmed, &fields[0], &finder, pattern.len());
let raw = match value {
Some(v) => v,
None => b"null" as &[u8],
};
match evaluate_select_predicate(raw, literal_bytes, op) {
Some(true) => {
*had_output = true;
output_buf.extend_from_slice(trimmed);
write_line_terminator(output_buf, config);
}
Some(false) => {} None => return None, }
}
Some(Ok(()))
}
fn process_chunk_select_compound_fused(
chunk: &[u8],
conditions: &[(Vec<String>, CmpOp, Vec<u8>)],
bool_op: BoolOp,
config: &OutputConfig,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
) -> Option<Result<()>> {
if config.mode != output::OutputMode::Compact {
return None;
}
for (fields, _, _) in conditions {
if fields.len() != 1 {
return None;
}
}
let patterns: Vec<Vec<u8>> = conditions
.iter()
.map(|(fields, _, _)| build_field_pattern(&fields[0]))
.collect();
let finders: Vec<memchr::memmem::Finder<'_>> =
patterns.iter().map(memchr::memmem::Finder::new).collect();
let len = chunk.len();
let mut line_start: usize = 0;
while line_start < len {
let nl_pos = memchr::memchr(b'\n', &chunk[line_start..])
.map(|p| line_start + p)
.unwrap_or(len);
let line = &chunk[line_start..nl_pos];
line_start = nl_pos + 1;
let end = line
.iter()
.rposition(|&b| !matches!(b, b' ' | b'\t' | b'\r'))
.map_or(0, |p| p + 1);
let start = line[..end]
.iter()
.position(|&b| !matches!(b, b' ' | b'\t' | b'\r'))
.unwrap_or(end);
let trimmed = &line[start..end];
if trimmed.is_empty() || trimmed[0] != b'{' {
continue;
}
if trimmed.len() > 1 && trimmed[1] != b'"' && trimmed[1] != b'}' {
return None;
}
let matched = match bool_op {
BoolOp::And => {
let mut all_true = true;
for (i, (fields, op, literal_bytes)) in conditions.iter().enumerate() {
let raw = find_field_value_raw_prebuilt(
trimmed,
&fields[0],
&finders[i],
patterns[i].len(),
)
.unwrap_or(b"null");
match evaluate_select_predicate(raw, literal_bytes, *op) {
Some(false) => {
all_true = false;
break;
}
Some(true) => continue,
None => return None,
}
}
all_true
}
BoolOp::Or => {
let mut any_true = false;
let mut any_ambiguous = false;
for (i, (fields, op, literal_bytes)) in conditions.iter().enumerate() {
let raw = find_field_value_raw_prebuilt(
trimmed,
&fields[0],
&finders[i],
patterns[i].len(),
)
.unwrap_or(b"null");
match evaluate_select_predicate(raw, literal_bytes, *op) {
Some(true) => {
any_true = true;
break;
}
Some(false) => continue,
None => {
any_ambiguous = true;
continue;
}
}
}
if !any_true && any_ambiguous {
return None;
}
any_true
}
};
if matched {
*had_output = true;
output_buf.extend_from_slice(trimmed);
write_line_terminator(output_buf, config);
}
}
Some(Ok(()))
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_eq(
trimmed: &[u8],
fields: &[String],
op: CmpOp,
literal_bytes: &[u8],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
let raw = dp
.find_field_raw(padded, trimmed.len(), &field_refs)
.context("failed to extract field from NDJSON line")?;
match evaluate_select_predicate(&raw, literal_bytes, op) {
Some(true) => {
*had_output = true;
emit_select_match(trimmed, filter, config, env, output_buf, scratch)?;
}
Some(false) => {}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
fn try_field_string_pred(filter: &Filter) -> Option<(Vec<String>, StringPred)> {
let (lhs, rhs) = match filter {
Filter::Pipe(lhs, rhs) => (lhs.as_ref(), rhs.as_ref()),
_ => return None,
};
let mut fields = Vec::new();
if !crate::filter::collect_field_chain(lhs, &mut fields) || fields.is_empty() {
return None;
}
let (name, args) = match rhs {
Filter::Builtin(name, args) => (name.as_str(), args),
_ => return None,
};
if args.len() != 1 {
return None;
}
let arg_str = match &args[0] {
Filter::Literal(crate::value::Value::String(s)) => s.clone(),
_ => return None,
};
let pred = match name {
"test" => {
let re = Regex::new(&arg_str).ok()?;
StringPred::Test(re)
}
"startswith" => StringPred::StartsWith(arg_str),
"endswith" => StringPred::EndsWith(arg_str),
"contains" => StringPred::Contains(arg_str),
_ => return None,
};
Some((fields, pred))
}
fn detect_select_string_pred_fast_path(filter: &Filter) -> Option<NdjsonFastPath> {
if let Filter::Pipe(lhs, rhs) = filter
&& let Filter::Select(inner) = lhs.as_ref()
&& let Some((fields, pred)) = try_field_string_pred(inner.as_ref())
{
let mut out_fields = Vec::new();
if crate::filter::collect_field_chain(rhs, &mut out_fields) && !out_fields.is_empty() {
return Some(NdjsonFastPath::SelectStringPredField {
pred_fields: fields,
pred,
out_fields,
});
}
}
let inner = match filter {
Filter::Select(inner) => inner.as_ref(),
_ => return None,
};
let (fields, pred) = try_field_string_pred(inner)?;
Some(NdjsonFastPath::SelectStringPred { fields, pred })
}
fn evaluate_string_predicate(raw: &[u8], pred: &StringPred) -> Option<bool> {
if raw.len() < 2 || raw[0] != b'"' || raw[raw.len() - 1] != b'"' {
return None; }
let inner = &raw[1..raw.len() - 1];
if !inner.contains(&b'\\') {
let s = std::str::from_utf8(inner).ok()?;
return Some(match pred {
StringPred::Test(re) => re.is_match(s),
StringPred::StartsWith(arg) => s.starts_with(arg.as_str()),
StringPred::EndsWith(arg) => s.ends_with(arg.as_str()),
StringPred::Contains(arg) => s.contains(arg.as_str()),
});
}
let mut unescaped = Vec::with_capacity(inner.len());
unescape_json_string(inner, &mut unescaped);
let s = std::str::from_utf8(&unescaped).ok()?;
Some(match pred {
StringPred::Test(re) => re.is_match(s),
StringPred::StartsWith(arg) => s.starts_with(arg.as_str()),
StringPred::EndsWith(arg) => s.ends_with(arg.as_str()),
StringPred::Contains(arg) => s.contains(arg.as_str()),
})
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_string_pred(
trimmed: &[u8],
fields: &[String],
pred: &StringPred,
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
let raw = dp
.find_field_raw(padded, trimmed.len(), &field_refs)
.context("failed to extract field from NDJSON line")?;
match evaluate_string_predicate(&raw, pred) {
Some(true) => {
*had_output = true;
emit_select_match(trimmed, filter, config, env, output_buf, scratch)?;
}
Some(false) => {}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
fn emit_select_match(
trimmed: &[u8],
filter: &Filter,
config: &OutputConfig,
env: &crate::filter::Env,
output_buf: &mut Vec<u8>,
scratch: &mut Vec<u8>,
) -> Result<()> {
if config.mode == output::OutputMode::Compact {
let padded = prepare_padded(trimmed, scratch);
let minified =
simdjson::minify(padded, trimmed.len()).context("failed to minify matched line")?;
output_buf.extend_from_slice(&minified);
write_line_terminator(output_buf, config);
} else {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
output::write_value(output_buf, &v, config).ok();
});
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_string_pred_field(
trimmed: &[u8],
pred_fields: &[String],
pred: &StringPred,
out_fields: &[String],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let pred_refs: Vec<&str> = pred_fields.iter().map(|s| s.as_str()).collect();
let raw_pred = dp
.find_field_raw(padded, trimmed.len(), &pred_refs)
.context("failed to extract predicate field from NDJSON line")?;
match evaluate_string_predicate(&raw_pred, pred) {
Some(true) => {
let padded = prepare_padded(trimmed, scratch);
let out_refs: Vec<&str> = out_fields.iter().map(|s| s.as_str()).collect();
let raw_out = dp
.find_field_raw(padded, trimmed.len(), &out_refs)
.context("failed to extract output field from NDJSON line")?;
*had_output = true;
emit_raw_field(output_buf, &raw_out, config);
write_line_terminator(output_buf, config);
}
Some(false) => {}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
fn detect_length_keys_fast_path(filter: &Filter) -> Option<NdjsonFastPath> {
if let Filter::Builtin(name, args) = filter {
match (name.as_str(), args.as_slice()) {
("length", []) => return Some(NdjsonFastPath::Length(vec![])),
("keys", []) => {
return Some(NdjsonFastPath::Keys {
fields: vec![],
sorted: true,
});
}
("keys_unsorted", []) => {
return Some(NdjsonFastPath::Keys {
fields: vec![],
sorted: false,
});
}
("type", []) => return Some(NdjsonFastPath::Type(vec![])),
("has", [Filter::Literal(crate::value::Value::String(s))]) => {
return Some(NdjsonFastPath::Has {
fields: vec![],
key: s.clone(),
});
}
_ => {}
}
}
if let Some((fields, builtin)) = crate::filter::decompose_field_builtin(filter) {
match builtin {
"length" => return Some(NdjsonFastPath::Length(fields)),
"keys" => {
return Some(NdjsonFastPath::Keys {
fields,
sorted: true,
});
}
"keys_unsorted" => {
return Some(NdjsonFastPath::Keys {
fields,
sorted: false,
});
}
"type" => return Some(NdjsonFastPath::Type(fields)),
_ => {}
}
}
if let Filter::Pipe(lhs, rhs) = filter
&& let Filter::Builtin(name, args) = rhs.as_ref()
&& name == "has"
&& args.len() == 1
&& let Filter::Literal(crate::value::Value::String(key)) = &args[0]
{
let mut fields = Vec::new();
if crate::filter::collect_field_chain(lhs, &mut fields) {
return Some(NdjsonFastPath::Has {
fields,
key: key.clone(),
});
}
}
None
}
fn detect_select_extract_fast_path(filter: &Filter) -> Option<NdjsonFastPath> {
let (lhs, rhs) = match filter {
Filter::Pipe(lhs, rhs) => (lhs.as_ref(), rhs.as_ref()),
_ => return None,
};
let select_inner = match lhs {
Filter::Select(inner) => inner.as_ref(),
_ => return None,
};
let (cmp_lhs, op, cmp_rhs) = match select_inner {
Filter::Compare(l, op, r) => (l.as_ref(), op, r.as_ref()),
_ => return None,
};
let (pred_fields, literal_bytes) = if let Some((f, b)) = try_field_literal(cmp_lhs, cmp_rhs) {
(f, b)
} else if let Some((f, b)) = try_field_literal(cmp_rhs, cmp_lhs) {
(f, b)
} else {
return None;
};
let mut out_fields = Vec::new();
if crate::filter::collect_field_chain(rhs, &mut out_fields) && !out_fields.is_empty() {
return Some(NdjsonFastPath::SelectEqField {
pred_fields,
op: *op,
literal_bytes,
out_fields,
});
}
if let Some(entries) = try_multi_field_obj(rhs) {
return Some(NdjsonFastPath::SelectEqObj {
pred_fields,
op: *op,
literal_bytes,
entries,
});
}
if let Some(entries) = try_multi_field_arr(rhs) {
return Some(NdjsonFastPath::SelectEqArr {
pred_fields,
op: *op,
literal_bytes,
entries,
});
}
None
}
fn detect_multi_field_fast_path(filter: &Filter) -> Option<NdjsonFastPath> {
if let Some(entries) = try_multi_field_obj(filter) {
return Some(NdjsonFastPath::MultiFieldObj { entries });
}
if let Some(entries) = try_multi_field_arr(filter) {
return Some(NdjsonFastPath::MultiFieldArr { entries });
}
None
}
fn try_multi_field_obj(filter: &Filter) -> Option<Vec<(Vec<u8>, Vec<String>)>> {
let pairs = match filter {
Filter::ObjectConstruct(pairs) => pairs,
_ => return None,
};
if pairs.is_empty() {
return None;
}
let mut entries = Vec::with_capacity(pairs.len());
for (key, val_filter) in pairs {
let key_name = match key {
crate::filter::ObjKey::Name(s) => s,
crate::filter::ObjKey::Expr(_) => return None,
};
let mut fields = Vec::new();
if !crate::filter::collect_field_chain(val_filter, &mut fields) || fields.is_empty() {
return None;
}
entries.push((json_key_bytes(key_name), fields));
}
Some(entries)
}
fn try_multi_field_arr(filter: &Filter) -> Option<Vec<Vec<String>>> {
let inner = match filter {
Filter::ArrayConstruct(inner) => inner.as_ref(),
_ => return None,
};
let items = match inner {
Filter::Comma(items) => items.as_slice(),
other => {
let mut fields = Vec::new();
if crate::filter::collect_field_chain(other, &mut fields) && !fields.is_empty() {
return Some(vec![fields]);
}
return None;
}
};
if items.is_empty() {
return None;
}
let mut entries = Vec::with_capacity(items.len());
for item in items {
let mut fields = Vec::new();
if !crate::filter::collect_field_chain(item, &mut fields) || fields.is_empty() {
return None;
}
entries.push(fields);
}
Some(entries)
}
#[allow(clippy::too_many_arguments)]
fn process_line_length(
trimmed: &[u8],
fields: &[String],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
match dp.field_length(padded, trimmed.len(), &field_refs)? {
Some(result) => {
*had_output = true;
output_buf.extend_from_slice(&result);
write_line_terminator(output_buf, config);
}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_keys(
trimmed: &[u8],
fields: &[String],
sorted: bool,
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
match dp.field_keys(padded, trimmed.len(), &field_refs, sorted)? {
Some(result) => {
*had_output = true;
output_buf.extend_from_slice(&result);
write_line_terminator(output_buf, config);
}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_type(
trimmed: &[u8],
fields: &[String],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let type_str = if fields.is_empty() {
let first_byte = trimmed
.iter()
.find(|&&b| !matches!(b, b' ' | b'\t' | b'\r'));
match first_byte {
Some(b'{') => Some("\"object\""),
Some(b'[') => Some("\"array\""),
Some(b'"') => Some("\"string\""),
Some(b't') | Some(b'f') => Some("\"boolean\""),
Some(b'n') => Some("\"null\""),
Some(b'0'..=b'9') | Some(b'-') => Some("\"number\""),
_ => None,
}
} else {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
let raw = dp.find_field_raw(padded, trimmed.len(), &field_refs)?;
match raw.first() {
Some(b'{') => Some("\"object\""),
Some(b'[') => Some("\"array\""),
Some(b'"') => Some("\"string\""),
Some(b't') | Some(b'f') => Some("\"boolean\""),
Some(b'n') => Some("\"null\""),
Some(b'0'..=b'9') | Some(b'-') => Some("\"number\""),
_ => None,
}
};
match type_str {
Some(s) => {
*had_output = true;
output_buf.extend_from_slice(s.as_bytes());
write_line_terminator(output_buf, config);
}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_has(
trimmed: &[u8],
fields: &[String],
key: &str,
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_refs: Vec<&str> = fields.iter().map(|s| s.as_str()).collect();
match dp.field_has(padded, trimmed.len(), &field_refs, key)? {
Some(result) => {
*had_output = true;
output_buf.extend_from_slice(if result { b"true" } else { b"false" });
write_line_terminator(output_buf, config);
}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_eq_field(
trimmed: &[u8],
pred_fields: &[String],
op: CmpOp,
literal_bytes: &[u8],
out_fields: &[String],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let pred_refs: Vec<&str> = pred_fields.iter().map(|s| s.as_str()).collect();
let raw_pred = dp
.find_field_raw(padded, trimmed.len(), &pred_refs)
.context("failed to extract predicate field from NDJSON line")?;
match evaluate_select_predicate(&raw_pred, literal_bytes, op) {
Some(true) => {
let padded = prepare_padded(trimmed, scratch);
let out_refs: Vec<&str> = out_fields.iter().map(|s| s.as_str()).collect();
let raw_out = dp
.find_field_raw(padded, trimmed.len(), &out_refs)
.context("failed to extract output field from NDJSON line")?;
*had_output = true;
emit_raw_field(output_buf, &raw_out, config);
write_line_terminator(output_buf, config);
}
Some(false) => {}
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
}
}
Ok(())
}
fn process_line_multi_field_obj(
trimmed: &[u8],
entries: &[(Vec<u8>, Vec<String>)],
config: &OutputConfig,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_chains: Vec<Vec<&str>> = entries
.iter()
.map(|(_, fields)| fields.iter().map(|s| s.as_str()).collect())
.collect();
let chain_refs: Vec<&[&str]> = field_chains.iter().map(|v| v.as_slice()).collect();
let raw_values = dp
.find_fields_raw(padded, trimmed.len(), &chain_refs)
.context("failed to batch-extract fields for object construction")?;
output_buf.push(b'{');
for (i, (key_bytes, _)) in entries.iter().enumerate() {
if i > 0 {
output_buf.push(b',');
}
output_buf.extend_from_slice(key_bytes);
output_buf.push(b':');
output_buf.extend_from_slice(&raw_values[i]);
}
output_buf.push(b'}');
*had_output = true;
write_line_terminator(output_buf, config);
Ok(())
}
fn process_line_multi_field_arr(
trimmed: &[u8],
entries: &[Vec<String>],
config: &OutputConfig,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let field_chains: Vec<Vec<&str>> = entries
.iter()
.map(|fields| fields.iter().map(|s| s.as_str()).collect())
.collect();
let chain_refs: Vec<&[&str]> = field_chains.iter().map(|v| v.as_slice()).collect();
let raw_values = dp
.find_fields_raw(padded, trimmed.len(), &chain_refs)
.context("failed to batch-extract fields for array construction")?;
output_buf.push(b'[');
for (i, _) in entries.iter().enumerate() {
if i > 0 {
output_buf.push(b',');
}
output_buf.extend_from_slice(&raw_values[i]);
}
output_buf.push(b']');
*had_output = true;
write_line_terminator(output_buf, config);
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_eq_obj(
trimmed: &[u8],
pred_fields: &[String],
op: CmpOp,
literal_bytes: &[u8],
entries: &[(Vec<u8>, Vec<String>)],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let pred_refs: Vec<&str> = pred_fields.iter().map(|s| s.as_str()).collect();
let raw_pred = dp
.find_field_raw(padded, trimmed.len(), &pred_refs)
.context("failed to extract predicate field from NDJSON line")?;
let should_output = match evaluate_select_predicate(&raw_pred, literal_bytes, op) {
Some(b) => b,
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
return Ok(());
}
};
if should_output {
let padded = prepare_padded(trimmed, scratch);
let field_chains: Vec<Vec<&str>> = entries
.iter()
.map(|(_, fields)| fields.iter().map(|s| s.as_str()).collect())
.collect();
let chain_refs: Vec<&[&str]> = field_chains.iter().map(|v| v.as_slice()).collect();
let raw_values = dp
.find_fields_raw(padded, trimmed.len(), &chain_refs)
.context("failed to batch-extract fields for select+obj")?;
output_buf.push(b'{');
for (i, (key_bytes, _)) in entries.iter().enumerate() {
if i > 0 {
output_buf.push(b',');
}
output_buf.extend_from_slice(key_bytes);
output_buf.push(b':');
output_buf.extend_from_slice(&raw_values[i]);
}
output_buf.push(b'}');
*had_output = true;
write_line_terminator(output_buf, config);
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
fn process_line_select_eq_arr(
trimmed: &[u8],
pred_fields: &[String],
op: CmpOp,
literal_bytes: &[u8],
entries: &[Vec<String>],
filter: &Filter,
config: &OutputConfig,
env: &Env,
output_buf: &mut Vec<u8>,
had_output: &mut bool,
scratch: &mut Vec<u8>,
dp: &mut simdjson::DomParser,
) -> Result<()> {
let padded = prepare_padded(trimmed, scratch);
let pred_refs: Vec<&str> = pred_fields.iter().map(|s| s.as_str()).collect();
let raw_pred = dp
.find_field_raw(padded, trimmed.len(), &pred_refs)
.context("failed to extract predicate field from NDJSON line")?;
let should_output = match evaluate_select_predicate(&raw_pred, literal_bytes, op) {
Some(b) => b,
None => {
let padded = prepare_padded(trimmed, scratch);
let value = simdjson::dom_parse_to_value(padded, trimmed.len())
.context("failed to parse NDJSON line")?;
crate::filter::eval::eval_filter_with_env(filter, &value, env, &mut |v| {
*had_output = true;
output::write_value(output_buf, &v, config).ok();
});
return Ok(());
}
};
if should_output {
let padded = prepare_padded(trimmed, scratch);
let field_chains: Vec<Vec<&str>> = entries
.iter()
.map(|fields| fields.iter().map(|s| s.as_str()).collect())
.collect();
let chain_refs: Vec<&[&str]> = field_chains.iter().map(|v| v.as_slice()).collect();
let raw_values = dp
.find_fields_raw(padded, trimmed.len(), &chain_refs)
.context("failed to batch-extract fields for select+arr")?;
output_buf.push(b'[');
for (i, _) in entries.iter().enumerate() {
if i > 0 {
output_buf.push(b',');
}
output_buf.extend_from_slice(&raw_values[i]);
}
output_buf.push(b']');
*had_output = true;
write_line_terminator(output_buf, config);
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_ndjson_objects() {
assert!(is_ndjson(b"{\"a\":1}\n{\"b\":2}\n"));
assert!(is_ndjson(b"{\"a\":1}\n{\"b\":2}"));
}
#[test]
fn detect_ndjson_arrays() {
assert!(is_ndjson(b"[1,2]\n[3,4]\n"));
}
#[test]
fn not_ndjson_single_object() {
assert!(!is_ndjson(b"{\"a\":1}\n"));
}
#[test]
fn not_ndjson_pretty_printed() {
assert!(!is_ndjson(b"{\n \"a\": 1\n}\n"));
}
#[test]
fn not_ndjson_single_line() {
assert!(!is_ndjson(b"{\"a\":1}"));
}
#[test]
fn not_ndjson_empty() {
assert!(!is_ndjson(b""));
}
#[test]
fn split_chunks_basic() {
let data = b"line1\nline2\nline3\n";
let chunks = split_chunks(data, 6);
let total: usize = chunks.iter().map(|c| c.len()).sum();
assert_eq!(total, data.len());
for (i, chunk) in chunks.iter().enumerate() {
if i < chunks.len() - 1 {
assert!(chunk.ends_with(b"\n"));
}
}
}
#[test]
fn split_chunks_single() {
let data = b"line1\n";
let chunks = split_chunks(data, 1024 * 1024);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], data);
}
#[test]
fn split_chunks_empty() {
assert!(split_chunks(b"", 1024).is_empty());
}
#[test]
fn split_chunks_huge_target_size() {
let data = b"line1\nline2\n";
let chunks = split_chunks(data, usize::MAX);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], &data[..]);
}
#[test]
fn split_chunks_no_newlines() {
let data = b"{\"key\":\"value\"}";
let chunks = split_chunks(data, 4);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], data.as_slice());
}
#[test]
fn split_chunks_no_trailing_newline() {
let data = b"line1\nline2\nline3";
let chunks = split_chunks(data, 6);
let total: usize = chunks.iter().map(|c| c.len()).sum();
assert_eq!(total, data.len());
let last = chunks.last().unwrap();
assert!(last.ends_with(b"line3"));
}
#[test]
fn split_chunks_only_newlines() {
let data = b"\n\n\n\n\n";
let chunks = split_chunks(data, 2);
let total: usize = chunks.iter().map(|c| c.len()).sum();
assert_eq!(total, data.len());
}
#[test]
fn split_chunks_oversized_record() {
let large_record = format!("{{\"data\":\"{}\"}}\n", "x".repeat(200));
let data = format!("{{\"a\":1}}\n{}{{\"b\":2}}\n", large_record);
let bytes = data.as_bytes();
let chunks = split_chunks(bytes, 10);
let total: usize = chunks.iter().map(|c| c.len()).sum();
assert_eq!(total, bytes.len());
let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
assert_eq!(reassembled, bytes);
}
#[test]
fn split_chunks_consecutive_newlines() {
let data = b"{\"a\":1}\n\n\n{\"b\":2}\n\n";
let chunks = split_chunks(data, 5);
let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
assert_eq!(reassembled, data.as_slice());
}
#[test]
fn split_chunks_target_size_zero() {
let data = b"a\nb\nc\n";
let chunks = split_chunks(data, 0);
let reassembled: Vec<u8> = chunks.iter().flat_map(|c| c.iter().copied()).collect();
assert_eq!(reassembled, data.as_slice());
for chunk in &chunks {
assert!(chunk.ends_with(b"\n"));
}
}
#[test]
fn split_chunks_escaped_newline_in_json() {
let data = b"{\"msg\":\"hello\\nworld\"}\n{\"msg\":\"foo\"}\n";
let chunks = split_chunks(data, 100);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], data.as_slice());
}
#[test]
fn process_ndjson_escaped_newline_preserved() {
let data = b"{\"msg\":\"hello\\nworld\"}\n";
let filter = crate::filter::parse(".msg").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(String::from_utf8(output).unwrap(), "\"hello\\nworld\"\n");
}
#[test]
fn process_ndjson_whitespace_only() {
let data = b"\n \n\t\n \n";
let filter = crate::filter::parse(".").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(!had_output);
assert!(output.is_empty());
}
#[test]
fn process_ndjson_no_trailing_newline() {
let data = b"{\"a\":1}\n{\"b\":2}";
let filter = crate::filter::parse(".").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(String::from_utf8(output).unwrap(), "{\"a\":1}\n{\"b\":2}\n");
}
#[test]
fn process_ndjson_basic() {
let data = b"{\"name\":\"alice\"}\n{\"name\":\"bob\"}\n";
let filter = crate::filter::parse(".name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(String::from_utf8(output).unwrap(), "\"alice\"\n\"bob\"\n");
}
#[test]
fn process_ndjson_identity() {
let data = b"{\"a\":1}\n{\"b\":2}\n";
let filter = crate::filter::parse(".").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"a\":1}\n{\"b\":2}\n");
}
#[test]
fn process_ndjson_empty_lines() {
let data = b"{\"a\":1}\n\n{\"b\":2}\n\n";
let filter = crate::filter::parse(".").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"a\":1}\n{\"b\":2}\n");
}
#[test]
fn fast_path_detects_field_chain() {
let filter = crate::filter::parse(".name").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::FieldChain(fields) => assert_eq!(fields, vec!["name"]),
other => panic!("expected FieldChain, got {:?}", other),
}
}
#[test]
fn fast_path_detects_nested_field_chain() {
let filter = crate::filter::parse(".actor.login").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::FieldChain(fields) => assert_eq!(fields, vec!["actor", "login"]),
other => panic!("expected FieldChain, got {:?}", other),
}
}
#[test]
fn fast_path_not_identity() {
let filter = crate::filter::parse(".").unwrap();
assert!(matches!(detect_fast_path(&filter), NdjsonFastPath::None));
}
#[test]
fn fast_path_not_complex_filter() {
let filter = crate::filter::parse(".[] | .name").unwrap();
assert!(matches!(detect_fast_path(&filter), NdjsonFastPath::None));
}
#[test]
fn fast_path_field_extraction_string() {
let data = b"{\"type\":\"PushEvent\"}\n{\"type\":\"WatchEvent\"}\n";
let filter = crate::filter::parse(".type").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(
String::from_utf8(output).unwrap(),
"\"PushEvent\"\n\"WatchEvent\"\n"
);
}
#[test]
fn fast_path_field_extraction_number() {
let data = b"{\"count\":42}\n{\"count\":7}\n";
let filter = crate::filter::parse(".count").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "42\n7\n");
}
#[test]
fn fast_path_field_extraction_nested() {
let data = b"{\"a\":{\"b\":\"deep\"}}\n{\"a\":{\"b\":\"val\"}}\n";
let filter = crate::filter::parse(".a.b").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "\"deep\"\n\"val\"\n");
}
#[test]
fn fast_path_missing_field_returns_null() {
let data = b"{\"name\":\"alice\"}\n{\"age\":30}\n";
let filter = crate::filter::parse(".name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "\"alice\"\nnull\n");
}
#[test]
fn fast_path_raw_output_unquotes_strings() {
let data = b"{\"name\":\"alice\"}\n{\"name\":\"bob\"}\n";
let filter = crate::filter::parse(".name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Raw,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "alice\nbob\n");
}
#[test]
fn fast_path_raw_output_non_string_passes_through() {
let data = b"{\"count\":42}\n{\"active\":true}\n";
let filter = crate::filter::parse(".count").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Raw,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "42\nnull\n");
}
#[test]
fn unescape_basic() {
let mut out = Vec::new();
unescape_json_string(b"hello world", &mut out);
assert_eq!(out, b"hello world");
}
#[test]
fn unescape_backslash_sequences() {
let mut out = Vec::new();
unescape_json_string(br#"line1\nline2\ttab\\back\"quote"#, &mut out);
assert_eq!(out, b"line1\nline2\ttab\\back\"quote");
}
#[test]
fn unescape_unicode() {
let mut out = Vec::new();
unescape_json_string(br#"\u0048\u0065\u006C\u006C\u006F"#, &mut out);
assert_eq!(out, b"Hello");
}
#[test]
fn unescape_surrogate_pair() {
let mut out = Vec::new();
unescape_json_string(br#"\uD83C\uDF0D"#, &mut out);
assert_eq!(String::from_utf8(out).unwrap(), "\u{1F30D}");
}
#[test]
fn fast_path_detects_select_eq_string() {
let filter = crate::filter::parse("select(.type == \"PushEvent\")").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["type"]);
assert_eq!(op, CmpOp::Eq);
assert_eq!(literal_bytes, b"\"PushEvent\"");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_eq_int() {
let filter = crate::filter::parse("select(.count == 42)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["count"]);
assert_eq!(op, CmpOp::Eq);
assert_eq!(literal_bytes, b"42");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_eq_bool() {
let filter = crate::filter::parse("select(.active == true)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["active"]);
assert_eq!(op, CmpOp::Eq);
assert_eq!(literal_bytes, b"true");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_eq_null() {
let filter = crate::filter::parse("select(.x == null)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["x"]);
assert_eq!(op, CmpOp::Eq);
assert_eq!(literal_bytes, b"null");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_ne() {
let filter = crate::filter::parse("select(.type != \"PushEvent\")").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["type"]);
assert_eq!(op, CmpOp::Ne);
assert_eq!(literal_bytes, b"\"PushEvent\"");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_reversed_operands() {
let filter = crate::filter::parse("select(\"PushEvent\" == .type)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["type"]);
assert_eq!(op, CmpOp::Eq);
assert_eq!(literal_bytes, b"\"PushEvent\"");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_select_gt_supported() {
let filter = crate::filter::parse("select(.count > 10)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq {
fields,
op,
literal_bytes,
} => {
assert_eq!(fields, vec!["count"]);
assert_eq!(op, CmpOp::Gt);
assert_eq!(literal_bytes, b"10");
}
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_select_lt_supported() {
let filter = crate::filter::parse("select(.score < 50)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq { op, .. } => assert_eq!(op, CmpOp::Lt),
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_select_ge_supported() {
let filter = crate::filter::parse("select(.n >= 0)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq { op, .. } => assert_eq!(op, CmpOp::Ge),
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_select_le_supported() {
let filter = crate::filter::parse("select(.n <= 100)").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEq { op, .. } => assert_eq!(op, CmpOp::Le),
other => panic!("expected SelectEq, got {:?}", other),
}
}
#[test]
fn fast_path_select_no_literal_not_supported() {
let filter = crate::filter::parse("select(.a == .b)").unwrap();
assert!(matches!(detect_fast_path(&filter), NdjsonFastPath::None));
}
#[test]
fn fast_path_select_eq_matching_line() {
let data = b"{\"type\":\"PushEvent\",\"id\":1}\n{\"type\":\"WatchEvent\",\"id\":2}\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"type\":\"PushEvent\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_ne_matching_line() {
let data = b"{\"type\":\"PushEvent\",\"id\":1}\n{\"type\":\"WatchEvent\",\"id\":2}\n";
let filter = crate::filter::parse("select(.type != \"PushEvent\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"type\":\"WatchEvent\",\"id\":2}\n"
);
}
#[test]
fn fast_path_select_eq_missing_field() {
let data = b"{\"a\":1}\n{\"x\":null}\n";
let filter = crate::filter::parse("select(.x == null)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"a\":1}\n{\"x\":null}\n"
);
}
#[test]
fn fast_path_detects_bare_length() {
let filter = crate::filter::parse("length").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Length(fields) => assert!(fields.is_empty()),
other => panic!("expected Length, got {:?}", other),
}
}
#[test]
fn fast_path_detects_bare_keys() {
let filter = crate::filter::parse("keys").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Keys { fields, sorted } => {
assert!(fields.is_empty());
assert!(sorted);
}
other => panic!("expected Keys, got {:?}", other),
}
}
#[test]
fn fast_path_detects_bare_keys_unsorted() {
let filter = crate::filter::parse("keys_unsorted").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Keys { fields, sorted } => {
assert!(fields.is_empty());
assert!(!sorted);
}
other => panic!("expected Keys unsorted, got {:?}", other),
}
}
#[test]
fn fast_path_detects_field_length() {
let filter = crate::filter::parse(".items | length").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Length(fields) => assert_eq!(fields, vec!["items"]),
other => panic!("expected Length, got {:?}", other),
}
}
#[test]
fn fast_path_detects_field_keys() {
let filter = crate::filter::parse(".data | keys").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Keys { fields, sorted } => {
assert_eq!(fields, vec!["data"]);
assert!(sorted);
}
other => panic!("expected Keys, got {:?}", other),
}
}
#[test]
fn fast_path_detects_bare_type() {
let filter = crate::filter::parse("type").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Type(fields) => assert!(fields.is_empty()),
other => panic!("expected Type, got {:?}", other),
}
}
#[test]
fn fast_path_detects_field_type() {
let filter = crate::filter::parse(".data | type").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Type(fields) => assert_eq!(fields, vec!["data"]),
other => panic!("expected Type, got {:?}", other),
}
}
#[test]
fn fast_path_detects_bare_has() {
let filter = crate::filter::parse(r#"has("name")"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Has { fields, key } => {
assert!(fields.is_empty());
assert_eq!(key, "name");
}
other => panic!("expected Has, got {:?}", other),
}
}
#[test]
fn fast_path_detects_field_has() {
let filter = crate::filter::parse(r#".data | has("name")"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::Has { fields, key } => {
assert_eq!(fields, vec!["data"]);
assert_eq!(key, "name");
}
other => panic!("expected Has, got {:?}", other),
}
}
#[test]
fn fast_path_length_on_objects() {
let data = b"{\"a\":1,\"b\":2}\n{\"x\":1}\n";
let filter = crate::filter::parse("length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(String::from_utf8(output).unwrap(), "2\n1\n");
}
#[test]
fn fast_path_length_on_arrays() {
let data = b"{\"items\":[1,2,3]}\n{\"items\":[4,5]}\n";
let filter = crate::filter::parse(".items | length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "3\n2\n");
}
#[test]
fn fast_path_keys_on_objects() {
let data = b"{\"b\":2,\"a\":1}\n{\"x\":1}\n";
let filter = crate::filter::parse("keys").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"[\"a\",\"b\"]\n[\"x\"]\n"
);
}
#[test]
fn fast_path_select_no_match_no_output() {
let data = b"{\"type\":\"WatchEvent\"}\n{\"type\":\"IssuesEvent\"}\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(!had_output);
assert!(output.is_empty());
}
#[test]
fn fast_path_select_all_match() {
let data = b"{\"type\":\"PushEvent\"}\n{\"type\":\"PushEvent\"}\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"type\":\"PushEvent\"}\n{\"type\":\"PushEvent\"}\n"
);
}
#[test]
fn fast_path_select_empty_string_literal() {
let data = b"{\"name\":\"\"}\n{\"name\":\"bob\"}\n";
let filter = crate::filter::parse("select(.name == \"\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"name\":\"\"}\n");
}
#[test]
fn fast_path_select_nested_field() {
let data = b"{\"a\":{\"b\":\"yes\"},\"id\":1}\n{\"a\":{\"b\":\"no\"},\"id\":2}\n";
let filter = crate::filter::parse("select(.a.b == \"yes\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"a\":{\"b\":\"yes\"},\"id\":1}\n"
);
}
#[test]
fn fast_path_select_with_empty_lines() {
let data = b"{\"type\":\"PushEvent\"}\n\n{\"type\":\"WatchEvent\"}\n\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"type\":\"PushEvent\"}\n"
);
}
#[test]
fn fast_path_select_false_literal() {
let data = b"{\"active\":false}\n{\"active\":true}\n";
let filter = crate::filter::parse("select(.active == false)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"active\":false}\n");
}
#[test]
fn fast_path_select_int_zero() {
let data = b"{\"n\":0}\n{\"n\":1}\n";
let filter = crate::filter::parse("select(.n == 0)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":0}\n");
}
#[test]
fn fast_path_select_negative_int() {
let data = b"{\"n\":-1}\n{\"n\":1}\n";
let filter = crate::filter::parse("select(.n == -1)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":-1}\n");
}
#[test]
fn fast_path_length_empty_object() {
let data = b"{}\n{\"a\":1}\n";
let filter = crate::filter::parse("length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "0\n1\n");
}
#[test]
fn fast_path_length_empty_array_field() {
let data = b"{\"items\":[]}\n{\"items\":[1]}\n";
let filter = crate::filter::parse(".items | length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "0\n1\n");
}
#[test]
fn fast_path_length_string_fallback() {
let data = b"{\"name\":\"alice\"}\n{\"name\":\"bob\"}\n";
let filter = crate::filter::parse(".name | length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "5\n3\n");
}
#[test]
fn fast_path_keys_empty_object() {
let data = b"{}\n{\"a\":1}\n";
let filter = crate::filter::parse("keys").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "[]\n[\"a\"]\n");
}
#[test]
fn fast_path_keys_array_fallback() {
let data = b"[10,20,30]\n[40]\n";
let filter = crate::filter::parse("keys").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "[0,1,2]\n[0]\n");
}
#[test]
fn fast_path_length_with_empty_lines() {
let data = b"{\"a\":1}\n\n{\"b\":2,\"c\":3}\n";
let filter = crate::filter::parse("length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "1\n2\n");
}
#[test]
fn fast_path_nested_field_length() {
let data = b"{\"a\":{\"b\":[1,2,3]}}\n{\"a\":{\"b\":[4]}}\n";
let filter = crate::filter::parse(".a.b | length").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "3\n1\n");
}
#[test]
fn fast_path_nested_field_keys() {
let data = b"{\"meta\":{\"b\":2,\"a\":1}}\n{\"meta\":{\"z\":1}}\n";
let filter = crate::filter::parse(".meta | keys").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"[\"a\",\"b\"]\n[\"z\"]\n"
);
}
#[test]
fn definitive_different_types() {
assert!(bytes_mismatch_is_definitive(b"\"hello\"", b"42"));
assert!(bytes_mismatch_is_definitive(b"\"hello\"", b"null"));
assert!(bytes_mismatch_is_definitive(b"42", b"true"));
assert!(bytes_mismatch_is_definitive(b"null", b"\"x\""));
assert!(bytes_mismatch_is_definitive(b"1", b"\"1\""));
}
#[test]
fn definitive_null_and_bools() {
assert!(bytes_mismatch_is_definitive(b"null", b"null ")); assert!(bytes_mismatch_is_definitive(b"true", b"false"));
assert!(bytes_mismatch_is_definitive(b"false", b"true"));
}
#[test]
fn definitive_plain_strings() {
assert!(bytes_mismatch_is_definitive(b"\"hello\"", b"\"world\""));
assert!(bytes_mismatch_is_definitive(b"\"abc\"", b"\"ab\""));
assert!(bytes_mismatch_is_definitive(b"\"\"", b"\"x\""));
}
#[test]
fn not_definitive_strings_with_escapes() {
assert!(!bytes_mismatch_is_definitive(b"\"\\u0041\"", b"\"A\""));
assert!(!bytes_mismatch_is_definitive(
b"\"caf\\u00e9\"",
b"\"cafe\""
));
}
#[test]
fn definitive_plain_integers() {
assert!(bytes_mismatch_is_definitive(b"42", b"43"));
assert!(bytes_mismatch_is_definitive(b"-1", b"1"));
assert!(bytes_mismatch_is_definitive(b"0", b"1"));
}
#[test]
fn not_definitive_float_vs_int() {
assert!(!bytes_mismatch_is_definitive(b"1.0", b"1"));
assert!(!bytes_mismatch_is_definitive(b"1e2", b"100"));
assert!(!bytes_mismatch_is_definitive(b"1E2", b"100"));
assert!(!bytes_mismatch_is_definitive(b"42.0", b"42"));
}
#[test]
fn fast_path_select_float_vs_int_eq() {
let data = b"{\"n\":1.0,\"id\":\"a\"}\n{\"n\":2,\"id\":\"b\"}\n";
let filter = crate::filter::parse("select(.n == 1)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"n\":1.0,\"id\":\"a\"}\n"
);
}
#[test]
fn fast_path_select_float_vs_int_ne() {
let data = b"{\"n\":1.0}\n{\"n\":2}\n";
let filter = crate::filter::parse("select(.n != 1)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":2}\n");
}
#[test]
fn fast_path_select_scientific_notation() {
let data = b"{\"n\":1e2,\"id\":\"a\"}\n{\"n\":99,\"id\":\"b\"}\n";
let filter = crate::filter::parse("select(.n == 100)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"n\":1e2,\"id\":\"a\"}\n"
);
}
#[test]
fn fast_path_select_unicode_escape_match() {
let data = b"{\"s\":\"\\u0041\",\"id\":1}\n{\"s\":\"B\",\"id\":2}\n";
let filter = crate::filter::parse("select(.s == \"A\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"s\":\"A\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_type_mismatch_no_fallback() {
let data = b"{\"n\":\"42\"}\n{\"n\":42}\n";
let filter = crate::filter::parse("select(.n == 42)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":42}\n");
}
#[test]
fn fast_path_select_missing_field_vs_string() {
let data = b"{\"a\":1}\n{\"x\":\"hello\"}\n";
let filter = crate::filter::parse("select(.x == \"hello\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"x\":\"hello\"}\n");
}
#[test]
fn fast_path_select_trailing_zero_float() {
let data = b"{\"n\":42.00}\n{\"n\":43}\n";
let filter = crate::filter::parse("select(.n == 42)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":42.00}\n");
}
#[test]
fn prepare_padded_reuses_buffer() {
let mut scratch = Vec::new();
let line1 = b"short";
let padded1 = prepare_padded(line1, &mut scratch);
assert!(padded1.len() >= line1.len() + crate::simdjson::padding());
let line2 = b"a much longer line that should not cause reallocation if scratch is big enough already";
let padded2 = prepare_padded(line2, &mut scratch);
assert!(padded2.len() >= line2.len() + crate::simdjson::padding());
assert_eq!(&padded2[..line2.len()], line2);
}
#[test]
fn fast_path_detects_select_eq_field() {
let filter = crate::filter::parse("select(.type == \"PushEvent\") | .actor.login").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEqField {
pred_fields,
op,
literal_bytes,
out_fields,
} => {
assert_eq!(pred_fields, vec!["type"]);
assert_eq!(op, CmpOp::Eq);
assert_eq!(literal_bytes, b"\"PushEvent\"");
assert_eq!(out_fields, vec!["actor", "login"]);
}
other => panic!("expected SelectEqField, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_ne_field() {
let filter = crate::filter::parse("select(.type != \"PushEvent\") | .name").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEqField { op, .. } => assert_eq!(op, CmpOp::Ne),
other => panic!("expected SelectEqField, got {:?}", other),
}
}
#[test]
fn fast_path_detects_multi_field_obj() {
let filter = crate::filter::parse("{type: .type, actor: .actor.login}").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::MultiFieldObj { entries } => {
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].0, b"\"type\"");
assert_eq!(entries[0].1, vec!["type"]);
assert_eq!(entries[1].0, b"\"actor\"");
assert_eq!(entries[1].1, vec!["actor", "login"]);
}
other => panic!("expected MultiFieldObj, got {:?}", other),
}
}
#[test]
fn fast_path_detects_multi_field_obj_shorthand() {
let filter = crate::filter::parse("{type, name}").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::MultiFieldObj { entries } => {
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].1, vec!["type"]);
assert_eq!(entries[1].1, vec!["name"]);
}
other => panic!("expected MultiFieldObj, got {:?}", other),
}
}
#[test]
fn fast_path_not_obj_with_expr_key() {
let filter = crate::filter::parse("{(.key): .value}").unwrap();
assert!(matches!(detect_fast_path(&filter), NdjsonFastPath::None));
}
#[test]
fn fast_path_not_obj_with_complex_value() {
let filter = crate::filter::parse("{total: (.x + .y)}").unwrap();
assert!(matches!(detect_fast_path(&filter), NdjsonFastPath::None));
}
#[test]
fn fast_path_detects_multi_field_arr() {
let filter = crate::filter::parse("[.type, .actor.login]").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::MultiFieldArr { entries } => {
assert_eq!(entries.len(), 2);
assert_eq!(entries[0], vec!["type"]);
assert_eq!(entries[1], vec!["actor", "login"]);
}
other => panic!("expected MultiFieldArr, got {:?}", other),
}
}
#[test]
fn fast_path_detects_single_field_arr() {
let filter = crate::filter::parse("[.name]").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::MultiFieldArr { entries } => {
assert_eq!(entries.len(), 1);
assert_eq!(entries[0], vec!["name"]);
}
other => panic!("expected MultiFieldArr, got {:?}", other),
}
}
#[test]
fn fast_path_not_arr_with_iterate() {
let filter = crate::filter::parse("[.[] | .x]").unwrap();
assert!(matches!(detect_fast_path(&filter), NdjsonFastPath::None));
}
#[test]
fn fast_path_detects_select_eq_obj() {
let filter =
crate::filter::parse("select(.type == \"PushEvent\") | {type, actor: .actor.login}")
.unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEqObj {
pred_fields,
entries,
..
} => {
assert_eq!(pred_fields, vec!["type"]);
assert_eq!(entries.len(), 2);
}
other => panic!("expected SelectEqObj, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_eq_arr() {
let filter = crate::filter::parse("select(.type == \"PushEvent\") | [.type, .id]").unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectEqArr {
pred_fields,
entries,
..
} => {
assert_eq!(pred_fields, vec!["type"]);
assert_eq!(entries.len(), 2);
}
other => panic!("expected SelectEqArr, got {:?}", other),
}
}
#[test]
fn fast_path_select_eq_field_matching() {
let data = b"{\"type\":\"PushEvent\",\"name\":\"alice\"}\n{\"type\":\"WatchEvent\",\"name\":\"bob\"}\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\") | .name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(String::from_utf8(output).unwrap(), "\"alice\"\n");
}
#[test]
fn fast_path_select_eq_field_no_match() {
let data = b"{\"type\":\"WatchEvent\",\"name\":\"a\"}\n{\"type\":\"IssuesEvent\",\"name\":\"b\"}\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\") | .name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(!had_output);
assert!(output.is_empty());
}
#[test]
fn fast_path_select_eq_field_missing_output() {
let data = b"{\"type\":\"PushEvent\"}\n{\"type\":\"WatchEvent\",\"name\":\"b\"}\n";
let filter = crate::filter::parse("select(.type == \"PushEvent\") | .name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "null\n");
}
#[test]
fn fast_path_select_eq_field_float_fallback() {
let data = b"{\"n\":1.0,\"name\":\"a\"}\n{\"n\":2,\"name\":\"b\"}\n";
let filter = crate::filter::parse("select(.n == 1) | .name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "\"a\"\n");
}
#[test]
fn fast_path_select_gt_int() {
let data = b"{\"n\":10}\n{\"n\":50}\n{\"n\":5}\n";
let filter = crate::filter::parse("select(.n > 9)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"n\":10}\n{\"n\":50}\n"
);
}
#[test]
fn fast_path_select_lt_int() {
let data = b"{\"n\":10}\n{\"n\":50}\n{\"n\":5}\n";
let filter = crate::filter::parse("select(.n < 10)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":5}\n");
}
#[test]
fn fast_path_select_ge_int() {
let data = b"{\"n\":10}\n{\"n\":50}\n{\"n\":5}\n";
let filter = crate::filter::parse("select(.n >= 10)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"n\":10}\n{\"n\":50}\n"
);
}
#[test]
fn fast_path_select_le_int() {
let data = b"{\"n\":10}\n{\"n\":50}\n{\"n\":5}\n";
let filter = crate::filter::parse("select(.n <= 10)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"n\":10}\n{\"n\":5}\n"
);
}
#[test]
fn fast_path_select_gt_float() {
let data = b"{\"n\":3.14}\n{\"n\":2.71}\n{\"n\":1.0}\n";
let filter = crate::filter::parse("select(.n > 3)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":3.14}\n");
}
#[test]
fn fast_path_select_gt_string_comparison() {
let data = b"{\"s\":\"apple\"}\n{\"s\":\"banana\"}\n{\"s\":\"cherry\"}\n";
let filter = crate::filter::parse("select(.s > \"banana\")").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"s\":\"cherry\"}\n");
}
#[test]
fn fast_path_select_gt_non_number_fallback() {
let data = b"{\"s\":\"hello\"}\n{\"s\":\"world\"}\n";
let filter = crate::filter::parse("select(.s > 5)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"s\":\"hello\"}\n{\"s\":\"world\"}\n"
);
}
#[test]
fn fast_path_select_gt_field_extract() {
let data = b"{\"n\":20,\"name\":\"a\"}\n{\"n\":5,\"name\":\"b\"}\n";
let filter = crate::filter::parse("select(.n > 10) | .name").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "\"a\"\n");
}
#[test]
fn fast_path_select_gt_negative_numbers() {
let data = b"{\"n\":-5}\n{\"n\":0}\n{\"n\":5}\n";
let filter = crate::filter::parse("select(.n > -1)").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":0}\n{\"n\":5}\n");
}
#[test]
fn predicate_eq_match() {
assert_eq!(
evaluate_select_predicate(b"42", b"42", CmpOp::Eq),
Some(true)
);
}
#[test]
fn predicate_ne_match() {
assert_eq!(
evaluate_select_predicate(b"42", b"99", CmpOp::Ne),
Some(true)
);
}
#[test]
fn predicate_gt_numbers() {
assert_eq!(
evaluate_select_predicate(b"50", b"10", CmpOp::Gt),
Some(true)
);
assert_eq!(
evaluate_select_predicate(b"5", b"10", CmpOp::Gt),
Some(false)
);
}
#[test]
fn predicate_lt_numbers() {
assert_eq!(
evaluate_select_predicate(b"5", b"10", CmpOp::Lt),
Some(true)
);
assert_eq!(
evaluate_select_predicate(b"50", b"10", CmpOp::Lt),
Some(false)
);
}
#[test]
fn predicate_ge_numbers() {
assert_eq!(
evaluate_select_predicate(b"10", b"10", CmpOp::Ge),
Some(true)
);
assert_eq!(
evaluate_select_predicate(b"9", b"10", CmpOp::Ge),
Some(false)
);
}
#[test]
fn predicate_le_numbers() {
assert_eq!(
evaluate_select_predicate(b"10", b"10", CmpOp::Le),
Some(true)
);
assert_eq!(
evaluate_select_predicate(b"11", b"10", CmpOp::Le),
Some(false)
);
}
#[test]
fn predicate_gt_strings() {
assert_eq!(
evaluate_select_predicate(b"\"banana\"", b"\"apple\"", CmpOp::Gt),
Some(true)
);
assert_eq!(
evaluate_select_predicate(b"\"apple\"", b"\"banana\"", CmpOp::Gt),
Some(false)
);
}
#[test]
fn predicate_ordering_different_types_fallback() {
assert_eq!(
evaluate_select_predicate(b"\"hello\"", b"42", CmpOp::Gt),
None
);
}
#[test]
fn predicate_ordering_string_with_escapes_fallback() {
assert_eq!(
evaluate_select_predicate(b"\"he\\nllo\"", b"\"world\"", CmpOp::Gt),
None
);
}
#[test]
fn predicate_float_comparison() {
assert_eq!(
evaluate_select_predicate(b"3.14", b"3.0", CmpOp::Gt),
Some(true)
);
assert_eq!(
evaluate_select_predicate(b"2.5", b"3.0", CmpOp::Gt),
Some(false)
);
}
#[test]
fn fast_path_multi_field_obj_basic() {
let data = b"{\"type\":\"PushEvent\",\"id\":1}\n{\"type\":\"WatchEvent\",\"id\":2}\n";
let filter = crate::filter::parse("{type, id: .id}").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"type\":\"PushEvent\",\"id\":1}\n{\"type\":\"WatchEvent\",\"id\":2}\n"
);
}
#[test]
fn fast_path_multi_field_obj_missing_field() {
let data = b"{\"type\":\"PushEvent\"}\n{\"type\":\"WatchEvent\",\"id\":2}\n";
let filter = crate::filter::parse("{type, id: .id}").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"type\":\"PushEvent\",\"id\":null}\n{\"type\":\"WatchEvent\",\"id\":2}\n"
);
}
#[test]
fn fast_path_multi_field_obj_nested() {
let data = b"{\"actor\":{\"login\":\"alice\"},\"repo\":{\"name\":\"foo\"}}\n";
let filter = crate::filter::parse("{actor: .actor.login, repo: .repo.name}").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"actor\":\"alice\",\"repo\":\"foo\"}\n"
);
}
#[test]
fn fast_path_multi_field_arr_basic() {
let data = b"{\"x\":1,\"y\":2}\n{\"x\":3,\"y\":4}\n";
let filter = crate::filter::parse("[.x, .y]").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(String::from_utf8(output).unwrap(), "[1,2]\n[3,4]\n");
}
#[test]
fn fast_path_multi_field_arr_missing_field() {
let data = b"{\"x\":1}\n{\"x\":2,\"y\":3}\n";
let filter = crate::filter::parse("[.x, .y]").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "[1,null]\n[2,3]\n");
}
#[test]
fn fast_path_multi_field_arr_nested() {
let data = b"{\"a\":{\"b\":\"deep\"},\"c\":1}\n";
let filter = crate::filter::parse("[.a.b, .c]").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "[\"deep\",1]\n");
}
#[test]
fn fast_path_select_eq_obj_basic() {
let data = b"{\"type\":\"A\",\"x\":1}\n{\"type\":\"B\",\"x\":2}\n";
let filter = crate::filter::parse("select(.type == \"A\") | {x: .x}").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"x\":1}\n");
}
#[test]
fn fast_path_select_eq_obj_no_match() {
let data = b"{\"type\":\"B\",\"x\":1}\n{\"type\":\"C\",\"x\":2}\n";
let filter = crate::filter::parse("select(.type == \"A\") | {x: .x}").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(!had_output);
assert!(output.is_empty());
}
#[test]
fn fast_path_select_eq_arr_basic() {
let data = b"{\"type\":\"A\",\"x\":1,\"y\":2}\n{\"type\":\"B\",\"x\":3,\"y\":4}\n";
let filter = crate::filter::parse("select(.type == \"A\") | [.x, .y]").unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "[1,2]\n");
}
#[test]
fn fast_path_detects_select_test() {
let filter = crate::filter::parse(r#"select(.msg | test("error"))"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectStringPred { fields, pred } => {
assert_eq!(fields, vec!["msg"]);
assert!(matches!(pred, StringPred::Test(_)));
}
other => panic!("expected SelectStringPred, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_startswith() {
let filter = crate::filter::parse(r#"select(.url | startswith("/api"))"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectStringPred { fields, pred } => {
assert_eq!(fields, vec!["url"]);
assert!(matches!(pred, StringPred::StartsWith(s) if s == "/api"));
}
other => panic!("expected SelectStringPred, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_endswith() {
let filter = crate::filter::parse(r#"select(.name | endswith(".json"))"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectStringPred { fields, pred } => {
assert_eq!(fields, vec!["name"]);
assert!(matches!(pred, StringPred::EndsWith(s) if s == ".json"));
}
other => panic!("expected SelectStringPred, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_contains() {
let filter = crate::filter::parse(r#"select(.desc | contains("alice"))"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectStringPred { fields, pred } => {
assert_eq!(fields, vec!["desc"]);
assert!(matches!(pred, StringPred::Contains(s) if s == "alice"));
}
other => panic!("expected SelectStringPred, got {:?}", other),
}
}
#[test]
fn fast_path_detects_select_test_extract() {
let filter = crate::filter::parse(r#"select(.msg | test("error")) | .code"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectStringPredField {
pred_fields,
pred,
out_fields,
} => {
assert_eq!(pred_fields, vec!["msg"]);
assert!(matches!(pred, StringPred::Test(_)));
assert_eq!(out_fields, vec!["code"]);
}
other => panic!("expected SelectStringPredField, got {:?}", other),
}
}
#[test]
fn fast_path_select_test_not_two_args() {
let filter = crate::filter::parse(r#"select(.msg | test("error"; "i"))"#).unwrap();
assert!(!matches!(
detect_fast_path(&filter),
NdjsonFastPath::SelectStringPred { .. }
));
}
#[test]
fn fast_path_select_test_nested_field() {
let filter = crate::filter::parse(r#"select(.actor.login | startswith("bot"))"#).unwrap();
match detect_fast_path(&filter) {
NdjsonFastPath::SelectStringPred { fields, .. } => {
assert_eq!(fields, vec!["actor", "login"]);
}
other => panic!("expected SelectStringPred, got {:?}", other),
}
}
#[test]
fn fast_path_select_test_matching() {
let data = b"{\"msg\":\"error: disk full\",\"id\":1}\n{\"msg\":\"ok\",\"id\":2}\n";
let filter = crate::filter::parse(r#"select(.msg | test("error"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(had_output);
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"msg\":\"error: disk full\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_startswith_matching() {
let data = b"{\"url\":\"/api/users\",\"id\":1}\n{\"url\":\"/web/home\",\"id\":2}\n";
let filter = crate::filter::parse(r#"select(.url | startswith("/api"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"url\":\"/api/users\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_endswith_matching() {
let data = b"{\"file\":\"data.json\",\"id\":1}\n{\"file\":\"data.csv\",\"id\":2}\n";
let filter = crate::filter::parse(r#"select(.file | endswith(".json"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"file\":\"data.json\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_contains_matching() {
let data = b"{\"desc\":\"hello alice\",\"id\":1}\n{\"desc\":\"hello bob\",\"id\":2}\n";
let filter = crate::filter::parse(r#"select(.desc | contains("alice"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"desc\":\"hello alice\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_test_no_match() {
let data = b"{\"msg\":\"ok\"}\n{\"msg\":\"success\"}\n";
let filter = crate::filter::parse(r#"select(.msg | test("error"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, had_output, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert!(!had_output);
assert!(output.is_empty());
}
#[test]
fn fast_path_select_test_regex_pattern() {
let data = b"{\"code\":\"ERR-001\"}\n{\"code\":\"OK-200\"}\n{\"code\":\"ERR-42\"}\n";
let filter = crate::filter::parse(r#"select(.code | test("^ERR-\\d+$"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"code\":\"ERR-001\"}\n{\"code\":\"ERR-42\"}\n"
);
}
#[test]
fn fast_path_select_test_non_string_fallback() {
let data = b"{\"n\":42}\n{\"n\":\"hello\"}\n";
let filter = crate::filter::parse(r#"select(.n | test("hello"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "{\"n\":\"hello\"}\n");
}
#[test]
fn fast_path_select_test_escaped_string() {
let data = b"{\"msg\":\"line1\\nline2\",\"id\":1}\n{\"msg\":\"ok\",\"id\":2}\n";
let filter = crate::filter::parse(r#"select(.msg | test("line1"))"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(
String::from_utf8(output).unwrap(),
"{\"msg\":\"line1\\nline2\",\"id\":1}\n"
);
}
#[test]
fn fast_path_select_test_extract_field() {
let data = b"{\"msg\":\"error: disk full\",\"code\":500}\n{\"msg\":\"ok\",\"code\":200}\n";
let filter = crate::filter::parse(r#"select(.msg | test("error")) | .code"#).unwrap();
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..Default::default()
};
let env = crate::filter::Env::empty();
let (output, _, _) = process_ndjson(data, &filter, &config, &env).unwrap();
assert_eq!(String::from_utf8(output).unwrap(), "500\n");
}
#[test]
fn string_pred_test_match() {
let re = Regex::new("error").unwrap();
let pred = StringPred::Test(re);
assert_eq!(
evaluate_string_predicate(b"\"error: disk full\"", &pred),
Some(true)
);
}
#[test]
fn string_pred_test_no_match() {
let re = Regex::new("error").unwrap();
let pred = StringPred::Test(re);
assert_eq!(evaluate_string_predicate(b"\"ok\"", &pred), Some(false));
}
#[test]
fn string_pred_startswith_match() {
let pred = StringPred::StartsWith("/api".to_string());
assert_eq!(
evaluate_string_predicate(b"\"/api/users\"", &pred),
Some(true)
);
}
#[test]
fn string_pred_startswith_no_match() {
let pred = StringPred::StartsWith("/api".to_string());
assert_eq!(
evaluate_string_predicate(b"\"/web/home\"", &pred),
Some(false)
);
}
#[test]
fn string_pred_endswith_match() {
let pred = StringPred::EndsWith(".json".to_string());
assert_eq!(
evaluate_string_predicate(b"\"data.json\"", &pred),
Some(true)
);
}
#[test]
fn string_pred_endswith_no_match() {
let pred = StringPred::EndsWith(".json".to_string());
assert_eq!(
evaluate_string_predicate(b"\"data.csv\"", &pred),
Some(false)
);
}
#[test]
fn string_pred_contains_match() {
let pred = StringPred::Contains("alice".to_string());
assert_eq!(
evaluate_string_predicate(b"\"hello alice!\"", &pred),
Some(true)
);
}
#[test]
fn string_pred_contains_no_match() {
let pred = StringPred::Contains("alice".to_string());
assert_eq!(
evaluate_string_predicate(b"\"hello bob\"", &pred),
Some(false)
);
}
#[test]
fn string_pred_non_string_returns_none() {
let re = Regex::new("x").unwrap();
let pred = StringPred::Test(re);
assert_eq!(evaluate_string_predicate(b"42", &pred), None);
assert_eq!(evaluate_string_predicate(b"null", &pred), None);
assert_eq!(evaluate_string_predicate(b"true", &pred), None);
}
#[test]
fn string_pred_escaped_string() {
let pred = StringPred::Contains("line1".to_string());
assert_eq!(
evaluate_string_predicate(b"\"line1\\nline2\"", &pred),
Some(true)
);
}
#[test]
fn string_pred_empty_string() {
let pred = StringPred::StartsWith("".to_string());
assert_eq!(evaluate_string_predicate(b"\"hello\"", &pred), Some(true));
}
#[test]
fn detect_reader_basic_ndjson() {
let data = b"{\"a\":1}\n{\"b\":2}\n";
let mut cursor = std::io::Cursor::new(data.as_slice());
assert!(detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn detect_reader_not_ndjson() {
let data = b"{\n \"a\": 1\n}\n";
let mut cursor = std::io::Cursor::new(data.as_slice());
assert!(!detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn detect_reader_single_object() {
let data = b"{\"a\":1}\n";
let mut cursor = std::io::Cursor::new(data.as_slice());
assert!(!detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn detect_reader_empty() {
let data = b"";
let mut cursor = std::io::Cursor::new(data.as_slice());
assert!(!detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn detect_reader_long_first_line() {
let line1 = format!("{{\"data\":\"{}\"}}", "x".repeat(10000));
let line2 = "{\"b\":2}";
let data = format!("{}\n{}\n", line1, line2);
let mut cursor = std::io::Cursor::new(data.as_bytes());
assert!(detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn detect_reader_long_lines_both() {
let line1 = format!("{{\"data\":\"{}\"}}", "a".repeat(50000));
let line2 = format!("{{\"data\":\"{}\"}}", "b".repeat(50000));
let data = format!("{}\n{}\n", line1, line2);
let mut cursor = std::io::Cursor::new(data.as_bytes());
assert!(detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn detect_reader_arrays() {
let data = b"[1,2,3]\n[4,5,6]\n";
let mut cursor = std::io::Cursor::new(data.as_slice());
assert!(detect_ndjson_from_reader(&mut cursor).unwrap());
}
#[test]
fn read_fully_exact() {
let data = b"hello world";
let mut cursor = std::io::Cursor::new(data.as_slice());
let mut buf = [0u8; 5];
let n = read_fully(&mut cursor, &mut buf).unwrap();
assert_eq!(n, 5);
assert_eq!(&buf, b"hello");
}
#[test]
fn read_fully_eof_before_full() {
let data = b"hi";
let mut cursor = std::io::Cursor::new(data.as_slice());
let mut buf = [0u8; 10];
let n = read_fully(&mut cursor, &mut buf).unwrap();
assert_eq!(n, 2);
assert_eq!(&buf[..2], b"hi");
}
#[test]
fn read_fully_empty() {
let data = b"";
let mut cursor = std::io::Cursor::new(data.as_slice());
let mut buf = [0u8; 10];
let n = read_fully(&mut cursor, &mut buf).unwrap();
assert_eq!(n, 0);
}
fn make_filter(expr: &str) -> Filter {
crate::filter::parse(expr).expect("failed to parse filter")
}
fn streaming_output(input: &[u8], filter_expr: &str) -> (Vec<u8>, bool) {
let filter = make_filter(filter_expr);
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..OutputConfig::default()
};
let env = Env::empty();
let mut cursor = std::io::Cursor::new(input);
let mut out = Vec::new();
let ho = process_ndjson_streaming(&mut cursor, &filter, &config, &env, &mut out)
.expect("streaming failed");
(out, ho)
}
fn buffered_output(input: &[u8], filter_expr: &str) -> (Vec<u8>, bool) {
let filter = make_filter(filter_expr);
let config = OutputConfig {
mode: crate::output::OutputMode::Compact,
..OutputConfig::default()
};
let env = Env::empty();
let (out, ho, _) = process_ndjson(input, &filter, &config, &env).expect("buffered failed");
(out, ho)
}
#[test]
fn streaming_basic_identity() {
let input = b"{\"a\":1}\n{\"b\":2}\n";
let (out, ho) = streaming_output(input, ".");
assert!(ho);
assert_eq!(out, b"{\"a\":1}\n{\"b\":2}\n");
}
#[test]
fn streaming_field_extraction() {
let input = b"{\"a\":1,\"b\":2}\n{\"a\":3,\"b\":4}\n";
let (out, ho) = streaming_output(input, ".a");
assert!(ho);
assert_eq!(out, b"1\n3\n");
}
#[test]
fn streaming_select() {
let input = b"{\"x\":1}\n{\"x\":2}\n{\"x\":3}\n";
let (out, ho) = streaming_output(input, "select(.x > 1) | .x");
assert!(ho);
assert_eq!(out, b"2\n3\n");
}
#[test]
fn streaming_empty_input() {
let (out, ho) = streaming_output(b"", ".");
assert!(!ho);
assert!(out.is_empty());
}
#[test]
fn streaming_single_line_no_trailing_newline() {
let input = b"{\"a\":1}";
let (out, ho) = streaming_output(input, ".a");
assert!(ho);
assert_eq!(out, b"1\n");
}
#[test]
fn streaming_matches_buffered() {
let lines: Vec<String> = (0..100)
.map(|i| format!("{{\"i\":{},\"s\":\"val_{}\"}}", i, i))
.collect();
let input = lines.join("\n") + "\n";
let input = input.as_bytes();
for filter_expr in &[".", ".i", ".s", "select(.i > 50) | .s", "{i,s}"] {
let (stream_out, stream_ho) = streaming_output(input, filter_expr);
let (buf_out, buf_ho) = buffered_output(input, filter_expr);
assert_eq!(
stream_out, buf_out,
"output mismatch for filter: {}",
filter_expr
);
assert_eq!(
stream_ho, buf_ho,
"had_output mismatch for filter: {}",
filter_expr
);
}
}
#[test]
fn streaming_handles_empty_lines() {
let input = b"{\"a\":1}\n\n{\"b\":2}\n\n";
let (stream_out, _) = streaming_output(input, ".");
let (buf_out, _) = buffered_output(input, ".");
assert_eq!(stream_out, buf_out);
}
}
#[doc(hidden)]
pub fn all_fast_path_test_filters() -> Vec<&'static str> {
#[deny(unreachable_patterns)]
fn _variant_coverage_check(fp: &NdjsonFastPath) {
match fp {
NdjsonFastPath::None => {}
NdjsonFastPath::FieldChain(_) => {}
NdjsonFastPath::SelectEq { .. } => {}
NdjsonFastPath::Length(_) => {}
NdjsonFastPath::Keys { .. } => {}
NdjsonFastPath::Type(_) => {}
NdjsonFastPath::Has { .. } => {}
NdjsonFastPath::SelectEqField { .. } => {}
NdjsonFastPath::MultiFieldObj { .. } => {}
NdjsonFastPath::MultiFieldArr { .. } => {}
NdjsonFastPath::SelectEqObj { .. } => {}
NdjsonFastPath::SelectEqArr { .. } => {}
NdjsonFastPath::SelectCompound { .. } => {}
NdjsonFastPath::SelectStringPred { .. } => {}
NdjsonFastPath::SelectStringPredField { .. } => {}
}
}
vec![
".name",
".actor.login",
"select(.type == \"PushEvent\")",
"select(.count == 42)",
"select(.active == true)",
"select(.value == null)",
"select(.type != \"PushEvent\")",
"select(.count > 10)",
"select(.count < 100)",
"select(.count >= 50)",
"select(.count <= 50)",
"select(.name > \"m\")",
"length",
".meta | length",
"keys",
".meta | keys",
"keys_unsorted",
".meta | keys_unsorted",
"type",
".meta | type",
"has(\"name\")",
".meta | has(\"x\")",
"select(.type == \"PushEvent\") | .name",
"select(.count > 10) | .name",
"{name: .name, count: .count}",
"{type: .type, login: .actor.login}",
"[.name, .count]",
"[.type, .actor.login]",
"select(.type == \"PushEvent\") | {name: .name, count: .count}",
"select(.type == \"PushEvent\") | [.name, .count]",
"select(.type == \"PushEvent\" and .active == true)",
"select(.type == \"PushEvent\" or .type == \"CreateEvent\")",
"select(.count > 10 and .active == true)",
"select(.type != \"PushEvent\" or .count < 100)",
"select(.name | test(\"^A\"))",
"select(.name | startswith(\"test\"))",
"select(.name | endswith(\".com\"))",
"select(.name | contains(\"oo\"))",
"select(.name | contains(\"oo\")) | .count",
]
}