use crate::error::{Error, Result};
use crate::metal::{Dispatch, MetalContext, MjHeader, MjParams, THREADGROUP_SIZE};
use crate::pool::Alloc;
use crate::stage::{Stage, Stage1Buffers, Stage2Buffers};
use super::stage1::{Stage1, Stage1Output};
pub const ERR_MISSING_COLON: u32 = 16;
pub const ERR_MISSING_COMMA: u32 = 17;
pub const ERR_UNEXPECTED_TOKEN: u32 = 18;
pub const ERR_INVALID_LITERAL: u32 = 19;
pub const ERR_UNBALANCED: u32 = 20;
pub const ERR_UNTERMINATED_STRING: u32 = 21;
pub const ERR_EMPTY_INPUT: u32 = 22;
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Stage2Output {
pub stage1: Stage1Output,
pub tape_ofs: Vec<u32>,
pub skeleton_token_index: Vec<u32>,
pub skeleton_pos: Vec<u32>,
pub skeleton_byte: Vec<u8>,
pub string_tokens: Vec<u32>,
pub scalar_tokens: Vec<u32>,
pub tape_word_total: u64,
pub stringbuf_total: u64,
pub error: Option<u64>,
}
impl Stage2Output {
#[must_use]
pub fn error_offset_code(&self) -> Option<(u64, u32)> {
self.error.map(|e| (e >> 32, e as u32))
}
pub(crate) fn rejected(stage1: Stage1Output, packed_error: u64) -> Self {
Self {
stage1,
error: Some(packed_error),
..Self::default()
}
}
pub(crate) fn from_rejection(
bufs1: &Stage1Buffers,
rejection: &Stage2Rejection,
) -> Self {
let header = bufs1.read_header();
let stage1 = Stage1Output::snapshot(
bufs1,
&header,
rejection.stage1_error.then_some(rejection.packed),
);
Self::rejected(stage1, rejection.packed)
}
}
#[derive(Debug)]
pub struct Stage2 {
stage1: Stage1,
validate: Stage,
spine3: Stage,
apply: Stage,
}
impl Stage2 {
#[must_use]
pub const fn new() -> Self {
Self {
stage1: Stage1::new(),
validate: Stage::new("token_validate_footprint"),
spine3: Stage::new("spine3"),
apply: Stage::new("apply_tape_offsets"),
}
}
pub fn run(&self, ctx: &MetalContext, input: &[u8]) -> Result<Stage2Output> {
let mut bufs1 = Stage1Buffers::new(ctx, input)?;
self.run_with_buffers(ctx, &mut bufs1)
}
pub fn run_with_buffers(
&self,
ctx: &MetalContext,
bufs1: &mut Stage1Buffers,
) -> Result<Stage2Output> {
match self.run_to_lists(ctx, Alloc::Direct, bufs1)? {
Stage2Run::Rejected(rejection) => {
Ok(Stage2Output::from_rejection(bufs1, &rejection))
}
Stage2Run::Accepted(run) => {
let Stage2Accepted {
bufs2,
header,
gpu_seconds: _,
} = *run;
let stage1 = Stage1Output::snapshot(bufs1, &header, None);
Ok(Self::collect_outputs(stage1, &bufs2, &header))
}
}
}
pub(crate) fn run_to_lists(
&self,
ctx: &MetalContext,
alloc: Alloc<'_>,
bufs1: &mut Stage1Buffers,
) -> Result<Stage2Run> {
let input_len = bufs1.input_len();
if bufs1.words() == 0 {
return Ok(Stage2Run::rejected(u64::from(ERR_EMPTY_INPUT), false));
}
for stage in [&self.validate, &self.spine3, &self.apply] {
let max = stage.pipeline(ctx)?.max_total_threads_per_threadgroup();
assert!(
max >= THREADGROUP_SIZE,
"kernel `{}` supports only {max} threads/threadgroup (< {THREADGROUP_SIZE})",
stage.name()
);
}
let t = super::timing::start();
let mut gpu_seconds = self.stage1.run_cb1(ctx, bufs1)?;
super::timing::record("cb1 (K1-K4)", t, gpu_seconds);
let t = super::timing::start();
let header = bufs1.read_header();
if let Some((offset, code)) = header.first_error() {
let packed = (offset << 32) | u64::from(code);
return Ok(Stage2Run::rejected(packed, true));
}
let token_total = usize::try_from(header.token_total).expect("token_total fits usize");
if token_total > input_len {
return Err(Error::CommandBuffer {
message: format!(
"stage1 header reports {token_total} tokens for {input_len} input bytes"
),
});
}
if token_total == 0 {
return Ok(Stage2Run::rejected(u64::from(ERR_EMPTY_INPUT), false));
}
bufs1.alloc_tokens_in(ctx, alloc, token_total)?;
let mut bufs2 = Stage2Buffers::new_in(ctx, alloc, token_total)?;
let tok_chunks = bufs2.chunks();
let word_chunks = bufs1.chunks();
let word_params = MjParams {
input_len: input_len as u64,
element_count: bufs1.words() as u64,
..Default::default()
};
let token_params = MjParams {
input_len: input_len as u64,
element_count: token_total as u64,
..Default::default()
};
let tok_chunk_params = MjParams {
input_len: input_len as u64,
element_count: tok_chunks as u64,
..Default::default()
};
super::timing::record("sync1: header + token/scratch alloc", t, 0.0);
let t = super::timing::start();
{
let mut batch = ctx.batch()?;
let h_input = batch.bind_read(&bufs1.input);
let h_quote = batch.bind_read(&bufs1.bm_quote);
let h_tok = batch.bind_read(&bufs1.bm_tok);
let h_qcounts = batch.bind_read(&bufs1.chunk_quote_counts);
let h_tcounts = batch.bind_read(&bufs1.chunk_token_counts);
let h_pos = batch.bind_write(bufs1.tok_pos.as_mut().expect("allocated above"));
let h_kind = batch.bind_write(bufs1.tok_kind.as_mut().expect("allocated above"));
let h_header = batch.bind_write(&mut bufs1.header);
let h_counts = batch.bind_write(&mut bufs2.chunk_counts);
let h_sbytes = batch.bind_write(&mut bufs2.chunk_string_bytes);
let h_error = batch.bind_write(&mut bufs2.chunk_error);
self.stage1.scatter_stage().encode(
&mut batch,
&[h_input, h_quote, h_tok, h_qcounts, h_tcounts, h_pos, h_kind],
Some(&word_params),
Dispatch::Threadgroups(word_chunks),
)?;
self.validate.encode(
&mut batch,
&[h_input, h_pos, h_kind, h_counts, h_sbytes, h_error],
Some(&token_params),
Dispatch::Threadgroups(tok_chunks),
)?;
self.spine3.encode(
&mut batch,
&[h_counts, h_sbytes, h_error, h_header],
Some(&tok_chunk_params),
Dispatch::Threadgroups(1),
)?;
let cb2_gpu = batch.commit_and_wait_timed()?;
gpu_seconds += cb2_gpu;
super::timing::record("cb2 (K5-K7)", t, cb2_gpu);
}
let t = super::timing::start();
let header = bufs1.read_header();
super::timing::record("sync2: header read", t, 0.0);
if let Some((offset, code)) = header.first_error() {
let packed = (offset << 32) | u64::from(code);
if let Alloc::Pool(pool) = alloc {
bufs2.recycle(pool);
}
return Ok(Stage2Run::rejected(packed, false));
}
let totals = ListTotals::checked(&header, token_total)?;
let t = super::timing::start();
bufs2.alloc_lists_in(ctx, alloc, totals.skeleton, totals.strings, totals.scalars)?;
{
let mut batch = ctx.batch()?;
let h_input = batch.bind_read(&bufs1.input);
let h_pos = batch.bind_read(bufs1.tok_pos.as_ref().expect("allocated above"));
let h_kind = batch.bind_read(bufs1.tok_kind.as_ref().expect("allocated above"));
let h_counts = batch.bind_read(&bufs2.chunk_counts);
let h_tape_ofs = batch.bind_write(&mut bufs2.tape_ofs);
let h_skel_ti = batch.bind_write(bufs2.skel_token_index.as_mut().expect("just allocated"));
let h_skel_pos = batch.bind_write(bufs2.skel_pos.as_mut().expect("just allocated"));
let h_skel_byte = batch.bind_write(bufs2.skel_byte.as_mut().expect("just allocated"));
let h_strings = batch.bind_write(bufs2.string_tokens.as_mut().expect("just allocated"));
let h_scalars = batch.bind_write(bufs2.scalar_tokens.as_mut().expect("just allocated"));
self.apply.encode(
&mut batch,
&[
h_input, h_pos, h_kind, h_counts, h_tape_ofs, h_skel_ti, h_skel_pos,
h_skel_byte, h_strings, h_scalars,
],
Some(&token_params),
Dispatch::Threadgroups(tok_chunks),
)?;
let cb2b_gpu = batch.commit_and_wait_timed()?;
gpu_seconds += cb2b_gpu;
super::timing::record("cb2b (K6b) + list alloc", t, cb2b_gpu);
}
Ok(Stage2Run::Accepted(Box::new(Stage2Accepted {
bufs2,
header,
gpu_seconds,
})))
}
pub(crate) fn collect_outputs(
stage1: Stage1Output,
bufs2: &Stage2Buffers,
header: &MjHeader,
) -> Stage2Output {
Stage2Output {
stage1,
tape_ofs: bufs2.tape_ofs.as_slice::<u32>().to_vec(),
skeleton_token_index: read_u32s(bufs2.skel_token_index.as_ref()),
skeleton_pos: read_u32s(bufs2.skel_pos.as_ref()),
skeleton_byte: bufs2
.skel_byte
.as_ref()
.map(|b| b.as_slice::<u8>().to_vec())
.unwrap_or_default(),
string_tokens: read_u32s(bufs2.string_tokens.as_ref()),
scalar_tokens: read_u32s(bufs2.scalar_tokens.as_ref()),
tape_word_total: header.tape_word_total,
stringbuf_total: header.stringbuf_total,
error: None,
}
}
}
pub(crate) enum Stage2Run {
Rejected(Stage2Rejection),
Accepted(Box<Stage2Accepted>),
}
impl Stage2Run {
fn rejected(packed: u64, stage1_error: bool) -> Self {
Self::Rejected(Stage2Rejection {
packed,
stage1_error,
})
}
}
#[derive(Debug, Clone, Copy)]
pub(crate) struct Stage2Rejection {
pub(crate) packed: u64,
pub(crate) stage1_error: bool,
}
pub(crate) struct Stage2Accepted {
pub(crate) bufs2: Stage2Buffers,
pub(crate) header: MjHeader,
pub(crate) gpu_seconds: f64,
}
impl Default for Stage2 {
fn default() -> Self {
Self::new()
}
}
struct ListTotals {
skeleton: usize,
strings: usize,
scalars: usize,
}
impl ListTotals {
fn checked(header: &MjHeader, token_total: usize) -> Result<Self> {
let corrupt = |what: &str, got: u64| Error::CommandBuffer {
message: format!("stage2 header reports {got} {what} for {token_total} tokens"),
};
let skeleton = usize::try_from(header.skeleton_total).expect("fits usize");
let strings = usize::try_from(header.string_total).expect("fits usize");
let scalars = usize::try_from(header.scalar_total).expect("fits usize");
if skeleton > token_total {
return Err(corrupt("skeleton records", header.skeleton_total));
}
if strings > token_total / 2 {
return Err(corrupt("string records", header.string_total));
}
if scalars > token_total {
return Err(corrupt("scalar records", header.scalar_total));
}
if header.tape_word_total > 2 * token_total as u64 {
return Err(corrupt("tape words", header.tape_word_total));
}
Ok(Self {
skeleton,
strings,
scalars,
})
}
}
fn read_u32s(buffer: Option<&crate::metal::GpuBuffer>) -> Vec<u32> {
buffer
.map(|b| b.as_slice::<u32>().to_vec())
.unwrap_or_default()
}
pub fn run_stage2(ctx: &MetalContext, input: &[u8]) -> Result<Stage2Output> {
Stage2::new().run(ctx, input)
}
#[cfg(test)]
mod tests {
use super::*;
fn ctx_or_skip(test: &str) -> Option<MetalContext> {
match MetalContext::new() {
Ok(ctx) => Some(ctx),
Err(err) => {
if std::env::var_os("METAL_JSON_REQUIRE_GPU").is_some_and(|v| v == "1") {
panic!("METAL_JSON_REQUIRE_GPU=1 but no usable Metal device: {err}");
}
eprintln!("SKIP {test}: no usable Metal device here ({err})");
None
}
}
}
#[test]
fn error_code_order_is_the_tie_break_contract() {
const {
assert!(ERR_MISSING_COLON < ERR_MISSING_COMMA);
assert!(ERR_MISSING_COMMA < ERR_UNEXPECTED_TOKEN);
assert!(ERR_UNEXPECTED_TOKEN < ERR_INVALID_LITERAL);
assert!(ERR_INVALID_LITERAL < ERR_UNBALANCED);
assert!(ERR_UNBALANCED < ERR_UNTERMINATED_STRING);
assert!(ERR_UNTERMINATED_STRING < ERR_EMPTY_INPUT);
assert!(super::super::ERR_UTF8 < ERR_MISSING_COLON);
assert!(super::super::ERR_STRING < ERR_MISSING_COLON);
}
}
#[test]
fn worked_example_outputs_are_exact() {
let Some(ctx) = ctx_or_skip("worked_example_outputs_are_exact") else {
return;
};
let out = run_stage2(&ctx, br#"{"a":[1,2.5],"b":"x\n"}"#).unwrap();
assert_eq!(out.error, None);
assert_eq!(
out.tape_ofs,
vec![1, 2, 3, 3, 3, 4, 6, 6, 8, 9, 9, 10, 10, 10, 11, 11]
);
assert_eq!(out.skeleton_byte, b"{:[,],:}".to_vec());
assert_eq!(out.skeleton_token_index, vec![0, 3, 4, 6, 8, 9, 12, 15]);
assert_eq!(out.skeleton_pos, vec![0, 4, 5, 7, 11, 12, 16, 22]);
assert_eq!(out.string_tokens, vec![1, 10, 13]);
assert_eq!(out.scalar_tokens, vec![5, 7]);
assert_eq!(out.tape_word_total, 11);
assert_eq!(out.stringbuf_total, 20);
assert_eq!(out.stage1.token_total, 16);
assert_eq!(out.stage1.error, None);
}
#[test]
fn root_scalars_and_strings() {
let Some(ctx) = ctx_or_skip("root_scalars_and_strings") else {
return;
};
let stage2 = Stage2::new();
let out = stage2.run(&ctx, b"42").unwrap();
assert_eq!(out.error, None);
assert_eq!(out.tape_ofs, vec![1]);
assert_eq!(out.tape_word_total, 2); assert_eq!(out.stringbuf_total, 0);
assert!(out.skeleton_byte.is_empty());
assert!(out.string_tokens.is_empty());
assert_eq!(out.scalar_tokens, vec![0]);
let out = stage2.run(&ctx, b"true").unwrap();
assert_eq!(out.error, None);
assert_eq!(out.tape_word_total, 1);
assert_eq!(out.scalar_tokens, vec![0]);
let out = stage2.run(&ctx, b"\"x\"").unwrap();
assert_eq!(out.error, None);
assert_eq!(out.tape_ofs, vec![1, 2]); assert_eq!(out.tape_word_total, 1);
assert_eq!(out.stringbuf_total, 6); assert_eq!(out.string_tokens, vec![0]);
assert!(out.scalar_tokens.is_empty());
}
#[test]
fn empty_and_whitespace_only_inputs_are_empty_input() {
let Some(ctx) = ctx_or_skip("empty_and_whitespace_only_inputs_are_empty_input") else {
return;
};
let stage2 = Stage2::new();
for input in [&b""[..], b" \t\n\r", b" "] {
let out = stage2.run(&ctx, input).unwrap();
assert_eq!(
out.error_offset_code(),
Some((0, ERR_EMPTY_INPUT)),
"{input:?}"
);
assert!(out.tape_ofs.is_empty(), "{input:?}");
assert!(out.skeleton_byte.is_empty(), "{input:?}");
}
}
#[test]
fn layer1_rejections_report_reference_offsets_and_codes() {
let Some(ctx) = ctx_or_skip("layer1_rejections_report_reference_offsets_and_codes")
else {
return;
};
let stage2 = Stage2::new();
let cases: &[(&[u8], u64, u32)] = &[
(b"]", 0, ERR_UNEXPECTED_TOKEN),
(b"}", 0, ERR_UNEXPECTED_TOKEN),
(b",1", 0, ERR_UNEXPECTED_TOKEN),
(b"{a: 1}", 1, ERR_UNEXPECTED_TOKEN), (b"[,]", 1, ERR_UNEXPECTED_TOKEN),
(b"[1,,2]", 3, ERR_UNEXPECTED_TOKEN),
(br#"["",]"#, 4, ERR_UNEXPECTED_TOKEN), (br#"{"a":}"#, 5, ERR_UNEXPECTED_TOKEN),
(b"[1:2]", 2, ERR_UNEXPECTED_TOKEN),
(b"[1 true]", 3, ERR_MISSING_COMMA),
(b"[3[4]]", 2, ERR_MISSING_COMMA),
(b"[][]", 2, ERR_MISSING_COMMA),
(b"[1]x", 3, ERR_MISSING_COMMA),
(b"null null", 5, ERR_MISSING_COMMA),
(b"{}{}", 2, ERR_MISSING_COMMA),
(b"[1,", 3, ERR_UNEXPECTED_TOKEN), (b"{", 0, ERR_UNBALANCED),
(b"[", 0, ERR_UNBALANCED),
(b"[[", 1, ERR_UNBALANCED),
(br#"["": 1]"#, 3, ERR_UNEXPECTED_TOKEN),
(br#""a":1"#, 3, ERR_UNEXPECTED_TOKEN),
(br#"{"x", null}"#, 4, ERR_MISSING_COLON),
(br#"{"a"}"#, 4, ERR_MISSING_COLON),
(br#"{"a""#, 4, ERR_MISSING_COLON), (br#"["x", truth]"#, 6, ERR_INVALID_LITERAL),
(b"[tru]", 1, ERR_INVALID_LITERAL),
(b"false0", 0, ERR_INVALID_LITERAL),
(b"truee", 0, ERR_INVALID_LITERAL),
(b"[True]", 1, ERR_UNEXPECTED_TOKEN),
(b"[*]", 1, ERR_UNEXPECTED_TOKEN),
(b"\xEF\xBB\xBF{}", 0, ERR_UNEXPECTED_TOKEN), (br#"{"a" "b"}"#, 5, ERR_MISSING_COLON),
(b"1 *", 2, ERR_MISSING_COMMA),
(b"1 truth", 2, ERR_MISSING_COMMA),
(br#"{"a"{"#, 4, ERR_MISSING_COLON),
(b"1{", 1, ERR_MISSING_COMMA),
(b"{[", 1, ERR_UNEXPECTED_TOKEN),
];
for &(input, offset, code) in cases {
let out = stage2.run(&ctx, input).unwrap();
assert_eq!(
out.error_offset_code(),
Some((offset, code)),
"{:?}",
String::from_utf8_lossy(input)
);
assert!(out.tape_ofs.is_empty(), "{input:?}: no tape_ofs");
assert!(out.skeleton_byte.is_empty(), "{input:?}: no skeleton");
assert!(out.string_tokens.is_empty(), "{input:?}: no string list");
assert!(
!out.stage1.tok_pos.is_empty(),
"{input:?}: stage-1 tokens kept on Layer-1 rejection"
);
}
}
#[test]
fn layer2_problems_pass_layer1() {
let Some(ctx) = ctx_or_skip("layer2_problems_pass_layer1") else {
return;
};
let stage2 = Stage2::new();
for input in [
&b"1]"[..], b"{}}", b"{},{}", b"1,2", b"[1}", b"[1", br#"[1,"a":2]"#, br#"{"a":1,2}"#, br#"{"foo":1, "a"}"#, ] {
let out = stage2.run(&ctx, input).unwrap();
assert_eq!(
out.error,
None,
"{:?} must pass Layer 1",
String::from_utf8_lossy(input)
);
assert!(
!out.skeleton_byte.is_empty(),
"{:?}: skeleton produced for CB3",
String::from_utf8_lossy(input)
);
}
let out = stage2.run(&ctx, b"1]").unwrap();
assert_eq!(out.skeleton_byte, b"]".to_vec());
assert_eq!(out.skeleton_token_index, vec![1]);
assert_eq!(out.skeleton_pos, vec![1]);
}
#[test]
fn stage1_rejections_carry_forward() {
let Some(ctx) = ctx_or_skip("stage1_rejections_carry_forward") else {
return;
};
let stage2 = Stage2::new();
let out = stage2.run(&ctx, b"ab\x80").unwrap();
assert_eq!(out.error_offset_code(), Some((2, super::super::ERR_UTF8)));
assert!(out.stage1.tok_pos.is_empty(), "stage-1 rejection contract");
assert!(out.tape_ofs.is_empty());
let out = stage2.run(&ctx, b"\"abc").unwrap();
assert_eq!(out.error_offset_code(), Some((4, super::super::ERR_STRING)));
assert!(out.tape_ofs.is_empty());
}
#[test]
fn multi_chunk_token_streams_scan_correctly() {
let Some(ctx) = ctx_or_skip("multi_chunk_token_streams_scan_correctly") else {
return;
};
let n = 3000usize;
let mut input = b"[".to_vec();
for i in 0..n {
if i > 0 {
input.push(b',');
}
input.push(b'0');
}
input.push(b']');
let out = run_stage2(&ctx, &input).unwrap();
assert_eq!(out.error, None);
assert_eq!(out.stage1.token_total, 1 + 2 * n as u64);
assert_eq!(out.tape_word_total, 2 + 2 * n as u64);
assert_eq!(out.stringbuf_total, 0);
assert_eq!(out.skeleton_byte.len(), n + 1);
assert_eq!(out.skeleton_byte[0], b'[');
assert_eq!(*out.skeleton_byte.last().unwrap(), b']');
assert!(out.skeleton_byte[1..n].iter().all(|&b| b == b','));
assert_eq!(out.scalar_tokens.len(), n);
assert_eq!(out.tape_ofs[0], 1);
for k in 0..n {
assert_eq!(out.tape_ofs[2 * k + 1], 2 + 2 * k as u32, "scalar {k}");
}
assert_eq!(out.tape_ofs[2 * n], 2 + 2 * n as u32);
assert!(
out.scalar_tokens
.iter()
.enumerate()
.all(|(k, &t)| t == 2 * k as u32 + 1)
);
}
#[cfg(feature = "cpu-reference")]
mod vs_reference {
use super::*;
use crate::reference::{
SkeletonRecord, TokenKind, stage1_classify, stage2_tokens, stage3_validate_local,
};
use crate::{Error as CrateError, SyntaxErrorKind};
fn code_for_kind(kind: SyntaxErrorKind) -> u32 {
match kind {
SyntaxErrorKind::MissingColon => ERR_MISSING_COLON,
SyntaxErrorKind::MissingComma => ERR_MISSING_COMMA,
SyntaxErrorKind::UnexpectedToken => ERR_UNEXPECTED_TOKEN,
SyntaxErrorKind::InvalidLiteral => ERR_INVALID_LITERAL,
SyntaxErrorKind::UnbalancedBrackets => ERR_UNBALANCED,
SyntaxErrorKind::UnterminatedString => ERR_UNTERMINATED_STRING,
SyntaxErrorKind::EmptyInput => ERR_EMPTY_INPUT,
other => panic!("reference stage 3 cannot produce {other:?}"),
}
}
fn diff(stage2: &Stage2, ctx: &MetalContext, input: &[u8], label: &str) {
let got = stage2
.run(ctx, input)
.unwrap_or_else(|e| panic!("{label}: GPU stage 2 failed: {e}"));
let bitmaps = match stage1_classify(input) {
Ok(bitmaps) => bitmaps,
Err(CrateError::Utf8 { offset }) => {
assert_eq!(
got.error_offset_code(),
Some((offset, super::super::super::ERR_UTF8)),
"{label}: UTF-8 verdict"
);
return;
}
Err(other) => panic!("{label}: unexpected reference error {other:?}"),
};
let quote_total: u64 = bitmaps
.quote_real
.iter()
.map(|w| u64::from(w.count_ones()))
.sum();
if quote_total % 2 == 1 {
assert_eq!(
got.error_offset_code(),
Some((input.len() as u64, super::super::super::ERR_STRING)),
"{label}: odd-quote verdict"
);
let tokens = stage2_tokens(&bitmaps, input);
assert!(
stage3_validate_local(&tokens, input).is_err(),
"{label}: reference must also reject an odd-quote input"
);
return;
}
let tokens = stage2_tokens(&bitmaps, input);
match stage3_validate_local(&tokens, input) {
Err(CrateError::Syntax { offset, kind }) => {
assert_eq!(
got.error_offset_code(),
Some((offset, code_for_kind(kind))),
"{label}: Layer-1 verdict for reference {kind:?}"
);
assert!(got.tape_ofs.is_empty(), "{label}: rejection contract");
assert!(got.skeleton_byte.is_empty(), "{label}: rejection contract");
}
Err(other) => panic!("{label}: unexpected reference error {other:?}"),
Ok(s3) => {
assert_eq!(got.error, None, "{label}: spurious GPU error");
let mut want_tape_ofs = Vec::with_capacity(tokens.len());
let mut running = 1u32;
for fp in &s3.footprints {
want_tape_ofs.push(running);
running += fp;
}
assert_eq!(got.tape_ofs, want_tape_ofs, "{label}: tape_ofs");
assert_eq!(
got.tape_word_total,
u64::from(running - 1),
"{label}: tape word total"
);
let want: Vec<SkeletonRecord> = s3.skeleton;
let got_records: Vec<SkeletonRecord> = got
.skeleton_token_index
.iter()
.zip(&got.skeleton_pos)
.zip(&got.skeleton_byte)
.map(|((&token_index, &pos), &byte)| SkeletonRecord {
token_index,
pos,
byte,
})
.collect();
assert_eq!(got_records, want, "{label}: skeleton records");
let want_strings: Vec<u32> = tokens
.iter()
.enumerate()
.filter(|(_, t)| t.kind == TokenKind::QuoteOpen)
.map(|(i, _)| u32::try_from(i).unwrap())
.collect();
let want_scalars: Vec<u32> = tokens
.iter()
.enumerate()
.filter(|(_, t)| t.kind == TokenKind::ScalarStart)
.map(|(i, _)| u32::try_from(i).unwrap())
.collect();
assert_eq!(got.string_tokens, want_strings, "{label}: string list");
assert_eq!(got.scalar_tokens, want_scalars, "{label}: scalar list");
let want_stringbuf: u64 = want_strings
.iter()
.map(|&i| {
let open = tokens[i as usize].pos;
let close = tokens[i as usize + 1].pos;
u64::from(close - open - 1) + 5
})
.sum();
assert_eq!(
got.stringbuf_total, want_stringbuf,
"{label}: stringbuf total"
);
}
}
}
#[test]
fn corpus_files_match_reference_stage3() {
let Some(ctx) = ctx_or_skip("corpus_files_match_reference_stage3") else {
return;
};
let stage2 = Stage2::new();
let corpus = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("corpus");
let mut paths: Vec<_> = std::fs::read_dir(&corpus)
.expect("corpus/ is checked in")
.map(|e| e.expect("readable corpus entry").path())
.filter(|p| p.extension().is_some_and(|e| e == "json"))
.collect();
paths.sort();
assert!(!paths.is_empty(), "corpus/ must contain fixtures");
for path in paths {
let name = path.file_name().unwrap().to_string_lossy().into_owned();
let bytes = std::fs::read(&path).expect("readable corpus fixture");
diff(&stage2, &ctx, &bytes, &name);
}
}
#[test]
fn jsontestsuite_files_match_reference_stage3() {
let Some(ctx) = ctx_or_skip("jsontestsuite_files_match_reference_stage3") else {
return;
};
let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("data/JSONTestSuite/test_parsing");
if !dir.is_dir() {
eprintln!(
"SKIP jsontestsuite_files_match_reference_stage3: {} not fetched \
(scripts/fetch_jsontestsuite.sh)",
dir.display()
);
return;
}
let stage2 = Stage2::new();
let mut paths: Vec<_> = std::fs::read_dir(&dir)
.expect("readable test_parsing dir")
.map(|e| e.expect("readable entry").path())
.filter(|p| p.extension().is_some_and(|e| e == "json"))
.collect();
paths.sort();
assert!(paths.len() >= 300, "the fetched suite has 318 files");
for path in paths {
let name = path.file_name().unwrap().to_string_lossy().into_owned();
let bytes = std::fs::read(&path).expect("readable suite file");
diff(&stage2, &ctx, &bytes, &name);
}
}
#[test]
fn structural_fixtures_match_reference_stage3() {
let Some(ctx) = ctx_or_skip("structural_fixtures_match_reference_stage3") else {
return;
};
let stage2 = Stage2::new();
let mut cases: Vec<Vec<u8>> = [
&br#"{"a":{}}"#[..],
br#"[[],[[]]]"#,
br#"{"k":[1,{"n":null},"s"],"e":{}}"#,
br#"{"":0}"#,
b"42",
b"true",
br#""root string""#,
b"",
b" \t\n\r",
b"]",
b"}",
b",1",
b"{a: 1}",
b"{1:1}",
br#"{[: "x"}"#,
br#"{:"b"}"#,
b"{,}",
b"[,1]",
b"[,]",
b"[}",
b"[1,,2]",
br#"{"x"::"b"}"#,
br#"{"a":}"#,
br#"["",]"#,
br#"{"id":0,}"#,
b"[1:2]",
b"[1 true]",
b"[3[4]]",
br#"["a" "b"]"#,
br#"{"a" "b"}"#,
br#"{"a" b}"#,
b"[][]",
br#"{"a": true} "x""#,
b"[1]x",
b"null null",
b"[1,",
br#"{"a":"#,
b"{",
b"[",
b"[[",
b"\"abc",
br#"]""#, br#"{"a"#,
br#"["a", "b"#,
br#"["": 1]"#,
br#"{"a":"b":"c"}"#,
br#""a":1"#,
br#"{"x", null}"#,
br#"{"a"}"#,
br#"{"a""#,
br#"{"foo":1, "a"}"#,
br#"["x", truth]"#,
b"[tru]",
b"false0",
b"[True]",
br#"{'a':0}"#,
b"[*]",
b"\xEF\xBB\xBF{}",
b"1]",
b"{}}",
b"{},{}",
b"1,2",
b"{}{}",
b"[1}",
b"[1",
br#"[1,"a":2]"#,
br#"{"a":1,2}"#,
b"x",
b"-0.0",
b"{ true:12}",
]
.iter()
.map(|d| d.to_vec())
.collect();
let mut big = b"[".to_vec();
for i in 0..900 {
if i > 0 {
big.push(b',');
}
big.extend_from_slice(format!("\"s{i}\"").as_bytes());
}
big.push(b']'); cases.push(big.clone());
let mut bad = big.clone();
let len = bad.len();
bad[len - 2] = b'x'; cases.push(bad);
let mut late_literal = b"[".to_vec();
for _ in 0..800 {
late_literal.extend_from_slice(b"true,");
}
late_literal.extend_from_slice(b"tru]"); cases.push(late_literal);
for input in &cases {
diff(
&stage2,
&ctx,
input,
&format!("{:?}", String::from_utf8_lossy(&input[..input.len().min(48)])),
);
}
}
}
}