use std::path::Path;
use std::rc::Rc;
use std::sync::{Arc, OnceLock};
use crate::document::Document;
use crate::error::Result;
#[allow(unused_imports)]
use crate::error::Error;
use crate::gpu::pipeline::{GpuInput, GpuParse, GpuPipeline, decode_packed_error};
use crate::input::AlignedInput;
use crate::metal::{GpuBuffer, MetalContext};
use crate::pool::ScratchPool;
use crate::stage::{MAX_INPUT_BYTES, WORD_BYTES};
use crate::tape::{StringBuffer, TapeBuffer};
pub const DEFAULT_MAX_DEPTH: u32 = 1024;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum Backend {
Gpu,
#[cfg(feature = "cpu-reference")]
CpuReference,
}
fn metal_device_available() -> bool {
static PROBE: OnceLock<bool> = OnceLock::new();
*PROBE.get_or_init(|| MetalContext::new().is_ok())
}
fn resolve_default_backend(gpu_available: bool) -> Backend {
if !gpu_available {
#[cfg(feature = "cpu-reference")]
return Backend::CpuReference;
}
Backend::Gpu
}
impl Default for Backend {
fn default() -> Self {
resolve_default_backend(metal_device_available())
}
}
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct ParserOptions {
pub max_depth: u32,
pub backend: Backend,
}
impl Default for ParserOptions {
fn default() -> Self {
Self {
max_depth: DEFAULT_MAX_DEPTH,
backend: Backend::default(),
}
}
}
#[derive(Debug)]
struct GpuState {
ctx: MetalContext,
pipeline: GpuPipeline,
pool: Arc<ScratchPool>,
}
#[derive(Debug, Clone)]
pub struct Parser {
opts: ParserOptions,
gpu: Option<Rc<GpuState>>,
}
impl Parser {
pub fn new() -> Result<Self> {
Self::with_options(ParserOptions::default())
}
pub fn with_options(opts: ParserOptions) -> Result<Self> {
let gpu = match opts.backend {
Backend::Gpu => Some(Rc::new(GpuState {
ctx: MetalContext::new()?,
pipeline: GpuPipeline::new(),
pool: Arc::new(ScratchPool::new()),
})),
#[cfg(feature = "cpu-reference")]
Backend::CpuReference => None,
};
Ok(Self { opts, gpu })
}
#[must_use]
pub fn options(&self) -> &ParserOptions {
&self.opts
}
pub fn parse(&self, json: &[u8]) -> Result<Document> {
match self.opts.backend {
Backend::Gpu => {
let gpu = self.gpu.as_ref().expect("Gpu backend constructed its state");
let parse = gpu.pipeline.run_pooled(
&gpu.ctx,
&gpu.pool,
GpuInput::Bytes(json),
self.opts.max_depth,
)?;
self.finish_gpu(parse)
}
#[cfg(feature = "cpu-reference")]
Backend::CpuReference => {
let (tape, strings) = crate::reference::parse(json, &self.opts)?;
Ok(Document::from_parts(tape, strings))
}
}
}
#[cfg(feature = "serde")]
pub fn parse_deserialize<T>(&self, json: &[u8]) -> Result<T>
where
T: ::serde::de::DeserializeOwned,
{
let doc = self.parse(json)?;
Ok(crate::serde::from_document(&doc)?)
}
pub fn parse_aligned(&self, input: &AlignedInput) -> Result<Document> {
match self.opts.backend {
Backend::Gpu => {
let gpu = self.gpu.as_ref().expect("Gpu backend constructed its state");
let buffer = unsafe {
GpuBuffer::from_page_aligned(&gpu.ctx, input.base_ptr(), input.len())?
};
let parse = gpu.pipeline.run_pooled(
&gpu.ctx,
&gpu.pool,
GpuInput::External {
buffer,
len: input.len(),
},
self.opts.max_depth,
)?;
self.finish_gpu(parse)
}
#[cfg(feature = "cpu-reference")]
Backend::CpuReference => self.parse(input),
}
}
pub fn parse_file(&self, path: impl AsRef<Path>) -> Result<Document> {
match self.opts.backend {
Backend::Gpu => self.parse_file_gpu_copy(path.as_ref()),
#[cfg(feature = "cpu-reference")]
Backend::CpuReference => {
let bytes = std::fs::read(path)?;
self.parse(&bytes)
}
}
}
pub unsafe fn parse_file_mmap(&self, path: impl AsRef<Path>) -> Result<Document> {
match self.opts.backend {
Backend::Gpu => self.parse_file_gpu_mmap(path.as_ref()),
#[cfg(feature = "cpu-reference")]
Backend::CpuReference => {
let bytes = std::fs::read(path)?;
self.parse(&bytes)
}
}
}
fn parse_file_gpu_copy(&self, path: &Path) -> Result<Document> {
use std::io::Read;
let gpu = self.gpu.as_ref().expect("Gpu backend constructed its state");
let mut file = std::fs::File::open(path)?;
let file_len = file.metadata()?.len();
if file_len > MAX_INPUT_BYTES {
return Err(Error::InputTooLarge {
len: file_len,
max: MAX_INPUT_BYTES,
});
}
let len = usize::try_from(file_len).expect("checked against MAX_INPUT_BYTES");
if len == 0 {
return self.parse(&[]);
}
let padded_len = len.next_multiple_of(WORD_BYTES);
let mut buffer = gpu.pool.checkout(&gpu.ctx, padded_len)?;
let contents = buffer.contents_mut();
file.read_exact(&mut contents[..len])?;
contents[len..].fill(b' ');
let parse = gpu.pipeline.run_pooled(
&gpu.ctx,
&gpu.pool,
GpuInput::Pooled { buffer, len },
self.opts.max_depth,
)?;
self.finish_gpu(parse)
}
fn parse_file_gpu_mmap(&self, path: &Path) -> Result<Document> {
let gpu = self.gpu.as_ref().expect("Gpu backend constructed its state");
let file = std::fs::File::open(path)?;
let file_len = file.metadata()?.len();
if file_len > MAX_INPUT_BYTES {
return Err(Error::InputTooLarge {
len: file_len,
max: MAX_INPUT_BYTES,
});
}
let len = usize::try_from(file_len).expect("checked against MAX_INPUT_BYTES");
if len == 0 {
return self.parse(&[]);
}
let padded_len = len.next_multiple_of(WORD_BYTES);
let mut map = unsafe { memmap2::MmapOptions::new().len(padded_len).map_copy(&file) }?;
map[len..padded_len].fill(b' ');
let ptr = core::ptr::NonNull::new(map.as_mut_ptr()).expect("mmap never returns null");
let buffer = unsafe { GpuBuffer::from_page_aligned(&gpu.ctx, ptr, len)? };
let parse = gpu.pipeline.run_pooled(
&gpu.ctx,
&gpu.pool,
GpuInput::External { buffer, len },
self.opts.max_depth,
)?;
drop(map);
self.finish_gpu(parse)
}
fn finish_gpu(&self, parse: GpuParse) -> Result<Document> {
let gpu = self.gpu.as_ref().expect("Gpu backend constructed its state");
match parse {
GpuParse::Rejected(packed) => Err(decode_packed_error(packed, self.opts.max_depth)),
GpuParse::Accepted(out) => {
let tape = TapeBuffer::from_gpu(out.tape, Arc::clone(&gpu.pool));
let strings = match out.stringbuf {
Some(buf) => StringBuffer::from_gpu(buf, Arc::clone(&gpu.pool)),
None => StringBuffer::new(),
};
Ok(Document::from_parts(tape, strings))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::error::SyntaxErrorKind;
fn gpu_parser_or_skip(test: &str) -> Option<Parser> {
match Parser::new() {
Ok(parser) if parser.options().backend == Backend::Gpu => Some(parser),
Ok(_) => {
if std::env::var_os("METAL_JSON_REQUIRE_GPU").is_some_and(|v| v == "1") {
panic!("METAL_JSON_REQUIRE_GPU=1 but the default backend is not Gpu");
}
eprintln!("SKIP {test}: no usable Metal device here (CPU fallback default)");
None
}
Err(err) => {
if std::env::var_os("METAL_JSON_REQUIRE_GPU").is_some_and(|v| v == "1") {
panic!("METAL_JSON_REQUIRE_GPU=1 but no usable Metal device: {err}");
}
eprintln!("SKIP {test}: no usable Metal device here ({err})");
None
}
}
}
#[test]
fn default_options() {
let opts = ParserOptions::default();
assert_eq!(opts.max_depth, 1024);
assert_eq!(opts.backend, Backend::default());
}
#[test]
fn default_backend_policy() {
assert_eq!(resolve_default_backend(true), Backend::Gpu);
#[cfg(feature = "cpu-reference")]
assert_eq!(resolve_default_backend(false), Backend::CpuReference);
#[cfg(not(feature = "cpu-reference"))]
assert_eq!(resolve_default_backend(false), Backend::Gpu);
if metal_device_available() {
assert_eq!(Backend::default(), Backend::Gpu);
let parser = Parser::new().expect("probe says a device exists");
assert_eq!(parser.options().backend, Backend::Gpu);
let doc = parser.parse(b"[1,2]").expect("default backend parses");
assert_eq!(doc.root().len(), Some(2));
} else {
if std::env::var_os("METAL_JSON_REQUIRE_GPU").is_some_and(|v| v == "1") {
panic!("METAL_JSON_REQUIRE_GPU=1 but the device probe failed");
}
assert_eq!(Backend::default(), resolve_default_backend(false));
#[cfg(feature = "cpu-reference")]
{
let parser =
Parser::new().expect("CpuReference default construction is infallible");
assert_eq!(parser.options().backend, Backend::CpuReference);
let doc = parser.parse(b"[1,2]").expect("fallback backend parses");
assert_eq!(doc.root().len(), Some(2));
}
}
}
#[test]
fn explicit_gpu_backend_is_never_second_guessed() {
let opts = ParserOptions {
backend: Backend::Gpu,
..ParserOptions::default()
};
match Parser::with_options(opts) {
Ok(parser) => {
assert!(
metal_device_available(),
"explicit Gpu constructed without a device?"
);
assert_eq!(parser.options().backend, Backend::Gpu);
}
Err(err) => {
assert!(
!metal_device_available(),
"explicit Gpu failed despite a usable device: {err}"
);
}
}
}
#[test]
fn new_uses_default_options() {
let Some(parser) = gpu_parser_or_skip("new_uses_default_options") else {
return;
};
assert_eq!(parser.options().max_depth, DEFAULT_MAX_DEPTH);
assert_eq!(parser.options().backend, Backend::Gpu);
}
#[test]
fn with_options_stores_the_options() {
if gpu_parser_or_skip("with_options_stores_the_options").is_none() {
return;
}
let opts = ParserOptions {
max_depth: 32,
..ParserOptions::default()
};
let parser = Parser::with_options(opts).expect("device exists: probe succeeded above");
assert_eq!(parser.options().max_depth, 32);
}
#[test]
fn default_parser_parses_the_worked_example() {
let Some(parser) = gpu_parser_or_skip("default_parser_parses_the_worked_example") else {
return;
};
let doc = parser
.parse(br#"{"a":[1,2.5],"b":"x\n"}"#)
.expect("default parser must parse");
let root = doc.root();
assert_eq!(root.len(), Some(2));
let a = root.get("a").unwrap();
assert_eq!(a.at(0).unwrap().as_i64(), Some(1));
assert_eq!(a.at(1).unwrap().as_f64(), Some(2.5));
assert_eq!(root.get("b").unwrap().as_str(), Some("x\n"));
let expected: [u64; 13] = [
0x7200_0000_0000_000C,
0x7B00_0002_0000_000C,
0x2200_0000_0000_0000,
0x5B00_0002_0000_0009,
0x6C00_0000_0000_0000,
1,
0x6400_0000_0000_0000,
0x4004_0000_0000_0000,
0x5D00_0000_0000_0003,
0x2200_0000_0000_0006,
0x2200_0000_0000_000C,
0x7D00_0000_0000_0001,
0x7200_0000_0000_0000,
];
assert_eq!(doc.tape(), expected);
}
#[test]
fn root_scalar_documents_parse_on_the_gpu() {
let Some(parser) = gpu_parser_or_skip("root_scalar_documents_parse_on_the_gpu") else {
return;
};
let doc = parser.parse(b"42").unwrap();
assert_eq!(doc.root().as_i64(), Some(42));
assert_eq!(doc.tape().len(), 4);
let doc = parser.parse(b"true").unwrap();
assert_eq!(doc.root().as_bool(), Some(true));
assert_eq!(doc.tape().len(), 3);
let doc = parser.parse(b" null \n").unwrap();
assert!(doc.root().is_null());
let doc = parser.parse(br#""xA""#).unwrap();
assert_eq!(doc.root().as_str(), Some("xA"));
let doc = parser.parse(b"-0.0").unwrap();
assert_eq!(
doc.root().as_f64().map(f64::to_bits),
Some((-0.0f64).to_bits())
);
}
#[test]
fn empty_and_whitespace_only_inputs_error_as_empty_input() {
let Some(parser) =
gpu_parser_or_skip("empty_and_whitespace_only_inputs_error_as_empty_input")
else {
return;
};
for input in [&b""[..], b" ", b" \t\n\r"] {
match parser.parse(input) {
Err(Error::Syntax {
offset: 0,
kind: SyntaxErrorKind::EmptyInput,
}) => {}
other => panic!("{input:?}: expected EmptyInput at 0, got {other:?}"),
}
}
}
#[test]
fn gpu_errors_map_to_the_public_error_enum() {
let Some(parser) = gpu_parser_or_skip("gpu_errors_map_to_the_public_error_enum") else {
return;
};
match parser.parse(b"ab\x80") {
Err(Error::Utf8 { offset: 2 }) => {}
other => panic!("utf8: {other:?}"),
}
match parser.parse(b"[1 true]") {
Err(Error::Syntax {
offset: 3,
kind: SyntaxErrorKind::MissingComma,
}) => {}
other => panic!("missing comma: {other:?}"),
}
match parser.parse(b"[1") {
Err(Error::Syntax {
offset: 0,
kind: SyntaxErrorKind::UnbalancedBrackets,
}) => {}
other => panic!("unbalanced: {other:?}"),
}
match parser.parse(b"{},1") {
Err(Error::TrailingContent { offset: 2 }) => {}
other => panic!("trailing: {other:?}"),
}
match parser.parse(b"[01]") {
Err(Error::Syntax {
offset: 1,
kind: SyntaxErrorKind::InvalidNumber,
}) => {}
other => panic!("number: {other:?}"),
}
match parser.parse(br#"["\q"]"#) {
Err(Error::Syntax {
offset: 2,
kind: SyntaxErrorKind::InvalidStringEscape,
}) => {}
other => panic!("escape: {other:?}"),
}
match parser.parse(b"[\"a\x01\"]") {
Err(Error::Syntax {
offset: 3,
kind: SyntaxErrorKind::ControlCharacterInString,
}) => {}
other => panic!("control: {other:?}"),
}
match parser.parse(b"\"abc") {
Err(Error::Syntax {
offset: 4,
kind: SyntaxErrorKind::UnterminatedString,
}) => {}
other => panic!("unterminated: {other:?}"),
}
let deep = Parser::with_options(ParserOptions {
max_depth: 3,
..ParserOptions::default()
})
.expect("device exists: gpu_parser_or_skip succeeded above");
match deep.parse(b"[[[[]]]]") {
Err(Error::DepthLimit {
offset: 3,
limit: 3,
}) => {}
other => panic!("depth: {other:?}"),
}
}
#[test]
fn parse_file_surfaces_io_errors() {
let Some(parser) = gpu_parser_or_skip("parse_file_surfaces_io_errors") else {
return;
};
let err = parser
.parse_file("/nonexistent/metal-json-no-such-file.json")
.unwrap_err();
assert!(matches!(err, Error::Io(_)), "got {err:?}");
}
#[cfg(feature = "cpu-reference")]
mod vs_reference {
use super::*;
use crate::value::{Value, ValueKind};
fn cpu_parser() -> Parser {
let opts = ParserOptions {
backend: Backend::CpuReference,
..ParserOptions::default()
};
Parser::with_options(opts).expect("CPU reference parser construction cannot fail")
}
fn cpu_parser_with_depth(max_depth: u32) -> Parser {
let opts = ParserOptions {
max_depth,
backend: Backend::CpuReference,
};
Parser::with_options(opts).expect("CPU reference parser construction cannot fail")
}
fn assert_values_eq(gpu: Value<'_>, cpu: Value<'_>, path: &str) {
assert_eq!(gpu.kind(), cpu.kind(), "{path}: kind");
match cpu.kind() {
ValueKind::Null => assert!(gpu.is_null(), "{path}"),
ValueKind::Bool => assert_eq!(gpu.as_bool(), cpu.as_bool(), "{path}"),
ValueKind::Int64 => assert_eq!(gpu.as_i64(), cpu.as_i64(), "{path}"),
ValueKind::UInt64 => assert_eq!(gpu.as_u64(), cpu.as_u64(), "{path}"),
ValueKind::Double => assert_eq!(
gpu.as_f64().map(f64::to_bits),
cpu.as_f64().map(f64::to_bits),
"{path}: f64 bits"
),
ValueKind::String => assert_eq!(gpu.as_str(), cpu.as_str(), "{path}"),
ValueKind::Array => {
assert_eq!(gpu.len(), cpu.len(), "{path}: array len");
for (i, (g, c)) in gpu.elements().zip(cpu.elements()).enumerate() {
assert_values_eq(g, c, &format!("{path}[{i}]"));
}
}
ValueKind::Object => {
assert_eq!(gpu.len(), cpu.len(), "{path}: object len");
for (i, ((gk, gv), (ck, cv))) in
gpu.entries().zip(cpu.entries()).enumerate()
{
assert_eq!(gk, ck, "{path}: key #{i}");
assert_values_eq(gv, cv, &format!("{path}.{ck}"));
}
}
}
}
fn diff_doc(gpu: &Parser, cpu: &Parser, input: &[u8], label: &str) {
match (gpu.parse(input), cpu.parse(input)) {
(Ok(gpu_doc), Ok(cpu_doc)) => {
assert_eq!(
gpu_doc.tape(),
cpu_doc.tape(),
"{label}: tape words must be bit-identical"
);
assert_values_eq(gpu_doc.root(), cpu_doc.root(), label);
}
(Err(_), Err(_)) => {} (Ok(_), Err(e)) => panic!("{label}: GPU accepted, reference rejected ({e})"),
(Err(e), Ok(_)) => panic!("{label}: GPU rejected ({e}), reference accepted"),
}
}
#[test]
fn corpus_documents_match_the_reference_backend() {
let Some(gpu) = gpu_parser_or_skip("corpus_documents_match_the_reference_backend")
else {
return;
};
let cpu = cpu_parser();
let corpus = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("corpus");
let mut paths: Vec<_> = std::fs::read_dir(&corpus)
.expect("corpus/ is checked in")
.map(|e| e.expect("readable corpus entry").path())
.filter(|p| p.extension().is_some_and(|e| e == "json"))
.collect();
paths.sort();
assert!(!paths.is_empty(), "corpus/ must contain fixtures");
for path in paths {
let name = path.file_name().unwrap().to_string_lossy().into_owned();
let bytes = std::fs::read(&path).expect("readable corpus fixture");
let doc = gpu
.parse(&bytes)
.unwrap_or_else(|e| panic!("{name}: corpus fixture must parse on GPU: {e}"));
let cpu_doc = cpu.parse(&bytes).expect("corpus parses on the reference");
assert_eq!(doc.tape(), cpu_doc.tape(), "{name}: tape");
assert_values_eq(doc.root(), cpu_doc.root(), &name);
}
}
#[test]
fn fixup_numbers_inside_full_documents_match_the_reference() {
let Some(gpu) =
gpu_parser_or_skip("fixup_numbers_inside_full_documents_match_the_reference")
else {
return;
};
let cpu = cpu_parser();
let json = format!(
r#"{{"ties":[{},-{}],"subnormal":{},"mix":[1,"s",2.5e10]}}"#,
"1.00000000000000011102230246251565404236316680908203125",
"1.00000000000000011102230246251565404236316680908203125",
"2.22507385850720113605740979670913197593481954635164564e-308",
);
diff_doc(&gpu, &cpu, json.as_bytes(), "fixup document");
let doc = gpu.parse(json.as_bytes()).unwrap();
let ties = doc.root().get("ties").unwrap();
assert_eq!(ties.at(0).unwrap().as_f64(), Some(1.0));
assert_eq!(
ties.at(1).unwrap().as_f64().map(f64::to_bits),
Some((-1.0f64).to_bits())
);
}
#[test]
fn multi_error_documents_reject_on_both_backends() {
let Some(gpu) = gpu_parser_or_skip("multi_error_documents_reject_on_both_backends")
else {
return;
};
let cpu = cpu_parser();
let input = br#"{"a":"\q","b":01}"#;
match gpu.parse(input) {
Err(Error::Syntax {
offset: 6,
kind: SyntaxErrorKind::InvalidStringEscape,
}) => {}
other => panic!("GPU: earliest offset (escape at 6), got {other:?}"),
}
match cpu.parse(input) {
Err(Error::Syntax {
offset: 14,
kind: SyntaxErrorKind::InvalidNumber,
}) => {}
other => panic!("reference: stage order (number at 14), got {other:?}"),
}
diff_doc(&gpu, &cpu, br#"[01, 0x2]"#, "two number errors");
}
#[test]
fn single_error_inputs_match_reference_code_and_offset() {
let Some(gpu) =
gpu_parser_or_skip("single_error_inputs_match_reference_code_and_offset")
else {
return;
};
let cpu = cpu_parser();
let cases: &[&[u8]] = &[
b"ab\x80", b"[1 true]", br#"{"a":1,2}"#, b"]", b"nul", b"[1", b"{},1", b"[01]", br#"{"k":1e999}"#, br#"["\q"]"#, b"[\"a\x01\"]", b"", ];
for &input in cases {
let gpu_err = gpu.parse(input).expect_err("rejects on GPU");
let cpu_err = cpu.parse(input).expect_err("rejects on reference");
assert_eq!(
format!("{gpu_err:?}"),
format!("{cpu_err:?}"),
"{:?}: error parity",
String::from_utf8_lossy(input)
);
}
let gpu_deep = Parser::with_options(ParserOptions {
max_depth: 2,
..ParserOptions::default()
})
.expect("device exists");
let cpu_deep = cpu_parser_with_depth(2);
let gpu_err = gpu_deep.parse(b"[[[]]]").expect_err("too deep");
let cpu_err = cpu_deep.parse(b"[[[]]]").expect_err("too deep");
assert_eq!(format!("{gpu_err:?}"), format!("{cpu_err:?}"));
}
#[test]
fn duplicate_keys_match_the_reference() {
let Some(gpu) = gpu_parser_or_skip("duplicate_keys_match_the_reference") else {
return;
};
let cpu = cpu_parser();
diff_doc(&gpu, &cpu, br#"{"k":1,"k":2,"k":3}"#, "duplicate keys");
let doc = gpu.parse(br#"{"k":1,"k":2,"k":3}"#).unwrap();
assert_eq!(doc.root().len(), Some(3));
assert_eq!(doc.root().get("k").unwrap().as_i64(), Some(1));
}
}
}