Skip to main content

uor_addr/json/
value.rs

1//! JSON typed input (ADR-023 amended by ADR-060) with JCS-RFC8785 +
2//! Unicode NFC canonical-form byte output.
3//!
4//! JSON canonicalization is **not** a streaming transform: JCS-RFC8785
5//! §3.2.3 sorts object members lexicographically by key, which needs
6//! per-object storage. The realization therefore materializes the
7//! canonical form once, in an `alloc` buffer ([`canonicalize`]), with
8//! **no** width / depth / count ceilings: string widths, number widths,
9//! object-key counts, array-element counts, and total size are unbounded.
10//! The handle then flows through the pipeline as a zero-copy
11//! [`TermValue::Borrowed`] carrier over those canonical bytes, and ψ₉
12//! folds them through the σ-axis. The single retained bound is
13//! [`MAX_JSON_DEPTH`](crate::json::shapes::bounds::MAX_JSON_DEPTH), a
14//! native-stack overflow guard on the recursive parser/canonicalizer.
15//!
16//! [`JsonValue`] (the owned parsed value, `alloc`-gated) holds the
17//! structurally-tagged byte form and backs the [`JsonValueRef`] navigator
18//! used by the schema-pinned descendants; [`JsonCarrier`] is the borrowed
19//! model-input handle the pipeline binds.
20//!
21//! # Tagged byte layout
22//!
23//! ```text
24//! JsonValue ::= Tag(1 byte) Payload
25//!   Tag = 0x00 Null         — no payload
26//!   Tag = 0x01 BoolFalse    — no payload
27//!   Tag = 0x02 BoolTrue     — no payload
28//!   Tag = 0x03 Number       — u32 BE length || N bytes (canonical ASCII)
29//!   Tag = 0x04 String       — u32 BE length || N bytes (UTF-8, NFC)
30//!   Tag = 0x05 Array        — u32 BE count  || count × JsonValue
31//!   Tag = 0x06 Object       — u32 BE count  || count × (u32 BE keylen || key || JsonValue)
32//! ```
33//!
34//! All multi-byte length / count fields are big-endian. Strings and
35//! object keys are NFC-normalized at parse time, so the canonical-form
36//! emitter is purely structural — it sorts object entries by NFC byte
37//! order and emits JCS syntax around already-canonical content.
38
39use prism::operation::TermValue;
40use prism::pipeline::{
41    ConstrainedTypeShape, ConstraintRef, IntoBindingValue, PartitionProductFields,
42};
43
44// ─── Tag byte constants (consumed only by the alloc-gated parser /
45//     canonicalizer / navigator) ──────────────────────────────────────────
46
47#[cfg(feature = "alloc")]
48pub(crate) const TAG_NULL: u8 = 0x00;
49#[cfg(feature = "alloc")]
50pub(crate) const TAG_FALSE: u8 = 0x01;
51#[cfg(feature = "alloc")]
52pub(crate) const TAG_TRUE: u8 = 0x02;
53#[cfg(feature = "alloc")]
54pub(crate) const TAG_NUMBER: u8 = 0x03;
55#[cfg(feature = "alloc")]
56pub(crate) const TAG_STRING: u8 = 0x04;
57#[cfg(feature = "alloc")]
58pub(crate) const TAG_ARRAY: u8 = 0x05;
59#[cfg(feature = "alloc")]
60pub(crate) const TAG_OBJECT: u8 = 0x06;
61
62// ─── ShapeViolation IRIs (alloc-gated parser) ───────────────────────────
63
64#[cfg(feature = "alloc")]
65const INVALID_JSON_VIOLATION: prism::pipeline::ShapeViolation = prism::pipeline::ShapeViolation {
66    shape_iri: "https://uor.foundation/addr/JsonValue",
67    constraint_iri: "https://uor.foundation/addr/JsonValue/validUtf8Json",
68    property_iri: "https://uor.foundation/addr/inputBytes",
69    expected_range: "https://uor.foundation/addr/ValidUtf8Json",
70    min_count: 0,
71    max_count: 1,
72    kind: prism::pipeline::ViolationKind::ValueCheck,
73};
74
75#[cfg(feature = "alloc")]
76const DEPTH_BOUND_VIOLATION: prism::pipeline::ShapeViolation = prism::pipeline::ShapeViolation {
77    shape_iri: "https://uor.foundation/addr/JsonValue",
78    constraint_iri: "https://uor.foundation/addr/JsonValue/depthBound",
79    property_iri: "https://uor.foundation/addr/JsonValue/depth",
80    expected_range: "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
81    min_count: 0,
82    max_count: crate::json::shapes::bounds::MAX_JSON_DEPTH as u32,
83    kind: prism::pipeline::ViolationKind::CardinalityViolation,
84};
85
86// ─── JsonCarrier — the borrowed model-input handle (no_alloc) ───────────
87
88/// Borrowed canonical-JSON input handle (ADR-060 borrowed carrier). A
89/// thin, `Copy` borrow of canonical bytes produced by [`canonicalize`];
90/// `as_binding_value` returns the `Borrowed` carrier zero-copy.
91#[derive(Clone, Copy, Debug)]
92pub struct JsonCarrier<'a>(&'a [u8]);
93
94impl<'a> JsonCarrier<'a> {
95    /// Wrap a canonical-JSON byte slice as a model input handle.
96    #[must_use]
97    pub fn new(canonical_bytes: &'a [u8]) -> Self {
98        Self(canonical_bytes)
99    }
100
101    /// Borrow the canonical-JSON bytes.
102    #[must_use]
103    pub fn canonical_bytes(&self) -> &'a [u8] {
104        self.0
105    }
106}
107
108impl ConstrainedTypeShape for JsonCarrier<'_> {
109    const IRI: &'static str = "https://uor.foundation/addr/JsonValue";
110    const SITE_COUNT: usize = 1;
111    const CONSTRAINTS: &'static [ConstraintRef] = &[];
112    const CYCLE_SIZE: u64 = u64::MAX;
113}
114
115impl prism::uor_foundation::pipeline::__sdk_seal::Sealed for JsonCarrier<'_> {}
116
117impl<'a> IntoBindingValue<'a> for JsonCarrier<'a> {
118    fn as_binding_value<const INLINE_BYTES: usize>(&self) -> TermValue<'a, INLINE_BYTES> {
119        TermValue::borrowed(self.0)
120    }
121}
122
123impl PartitionProductFields for JsonCarrier<'_> {
124    const FIELDS: &'static [(u32, u32)] = &[];
125    const FIELD_NAMES: &'static [&'static str] = &[];
126}
127
128// ═════════════════════════════════════════════════════════════════════
129// alloc-gated parser, canonicalizer, owned value, and navigator
130// ═════════════════════════════════════════════════════════════════════
131
132#[cfg(feature = "alloc")]
133pub use alloc_impl::{canonicalize, ArrayIter, JsonValue, JsonValueRef, ObjectIter};
134
135#[cfg(feature = "alloc")]
136mod alloc_impl {
137    use super::*;
138    use crate::canonical::nfc;
139    use crate::json::shapes::bounds::MAX_JSON_DEPTH;
140    use alloc::vec::Vec;
141    use prism::pipeline::ShapeViolation;
142
143    // ─── JsonValue — the owned parsed value ─────────────────────────────
144
145    /// Owned parsed JSON value, holding the structurally-tagged byte form
146    /// documented in the [module header](super). Backs the
147    /// [`JsonValueRef`] navigator. There is no width / depth / count
148    /// ceiling. **`alloc`-gated** — the pipeline binds the borrowed
149    /// [`JsonCarrier`] handle.
150    #[derive(Clone, PartialEq, Eq)]
151    pub struct JsonValue {
152        pub(crate) bytes: Vec<u8>,
153    }
154
155    impl core::fmt::Debug for JsonValue {
156        fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
157            f.debug_struct("JsonValue")
158                .field("len", &self.bytes.len())
159                .finish_non_exhaustive()
160        }
161    }
162
163    impl JsonValue {
164        /// Parse raw JSON bytes into a typed `JsonValue` (RFC 8259 syntax,
165        /// escape decoding, UAX #15 NFC normalization, JCS number
166        /// canonicalization).
167        ///
168        /// # Errors
169        ///
170        /// - `validUtf8Json` — input is not valid UTF-8 JSON.
171        /// - `depthBound` — nesting exceeds the [`MAX_JSON_DEPTH`]
172        ///   stack-safety bound.
173        pub fn parse(raw: &[u8]) -> Result<Self, ShapeViolation> {
174            let mut value = Self { bytes: Vec::new() };
175            let mut p = Parser::new(raw);
176            p.skip_ws();
177            parse_value(&mut p, &mut value, 0)?;
178            p.skip_ws();
179            if !p.is_eof() {
180                return Err(INVALID_JSON_VIOLATION);
181            }
182            Ok(value)
183        }
184
185        /// Borrow the structurally-tagged byte serialization (the runtime
186        /// form; **not** the canonical bytes ψ₉ hashes — derive those via
187        /// [`canonicalize`]).
188        #[must_use]
189        pub fn tagged_bytes(&self) -> &[u8] {
190            &self.bytes
191        }
192
193        fn push_byte(&mut self, b: u8) {
194            self.bytes.push(b);
195        }
196
197        fn push_u32_be(&mut self, v: u32) {
198            self.bytes.extend_from_slice(&v.to_be_bytes());
199        }
200
201        fn extend(&mut self, data: &[u8]) {
202            self.bytes.extend_from_slice(data);
203        }
204
205        fn patch_u32_be(&mut self, offset: usize, v: u32) {
206            self.bytes[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
207        }
208    }
209
210    /// Parse + emit the JCS-RFC8785 + Unicode NFC canonical-form bytes.
211    ///
212    /// # Errors
213    ///
214    /// Surfaces any [`ShapeViolation`] [`JsonValue::parse`] would emit.
215    pub fn canonicalize(raw: &[u8]) -> Result<Vec<u8>, ShapeViolation> {
216        let value = JsonValue::parse(raw)?;
217        let mut out = Vec::new();
218        let mut pos = 0;
219        emit_value(value.tagged_bytes(), &mut pos, &mut out)?;
220        Ok(out)
221    }
222
223    // ─── Tokenizer ──────────────────────────────────────────────────────
224
225    struct Parser<'a> {
226        input: &'a [u8],
227        pos: usize,
228    }
229
230    impl<'a> Parser<'a> {
231        fn new(input: &'a [u8]) -> Self {
232            Self { input, pos: 0 }
233        }
234        fn is_eof(&self) -> bool {
235            self.pos >= self.input.len()
236        }
237        fn peek(&self) -> Result<u8, ShapeViolation> {
238            if self.is_eof() {
239                return Err(INVALID_JSON_VIOLATION);
240            }
241            Ok(self.input[self.pos])
242        }
243        fn bump(&mut self) -> Result<u8, ShapeViolation> {
244            let b = self.peek()?;
245            self.pos += 1;
246            Ok(b)
247        }
248        fn skip_ws(&mut self) {
249            while self.pos < self.input.len() {
250                match self.input[self.pos] {
251                    b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
252                    _ => break,
253                }
254            }
255        }
256        fn expect(&mut self, byte: u8) -> Result<(), ShapeViolation> {
257            if self.bump()? != byte {
258                return Err(INVALID_JSON_VIOLATION);
259            }
260            Ok(())
261        }
262        fn expect_lit(&mut self, lit: &[u8]) -> Result<(), ShapeViolation> {
263            if self.pos + lit.len() > self.input.len()
264                || &self.input[self.pos..self.pos + lit.len()] != lit
265            {
266                return Err(INVALID_JSON_VIOLATION);
267            }
268            self.pos += lit.len();
269            Ok(())
270        }
271    }
272
273    fn parse_value(
274        p: &mut Parser<'_>,
275        out: &mut JsonValue,
276        depth: usize,
277    ) -> Result<(), ShapeViolation> {
278        if depth > MAX_JSON_DEPTH {
279            return Err(DEPTH_BOUND_VIOLATION);
280        }
281        p.skip_ws();
282        match p.peek()? {
283            b'n' => {
284                p.expect_lit(b"null")?;
285                out.push_byte(TAG_NULL);
286                Ok(())
287            }
288            b't' => {
289                p.expect_lit(b"true")?;
290                out.push_byte(TAG_TRUE);
291                Ok(())
292            }
293            b'f' => {
294                p.expect_lit(b"false")?;
295                out.push_byte(TAG_FALSE);
296                Ok(())
297            }
298            b'"' => parse_string(p, out),
299            b'-' | b'0'..=b'9' => parse_number(p, out),
300            b'[' => parse_array(p, out, depth + 1),
301            b'{' => parse_object(p, out, depth + 1),
302            _ => Err(INVALID_JSON_VIOLATION),
303        }
304    }
305
306    fn parse_array(
307        p: &mut Parser<'_>,
308        out: &mut JsonValue,
309        depth: usize,
310    ) -> Result<(), ShapeViolation> {
311        p.expect(b'[')?;
312        out.push_byte(TAG_ARRAY);
313        let count_pos = out.bytes.len();
314        out.push_u32_be(0);
315        let mut count: u32 = 0;
316        p.skip_ws();
317        if p.peek()? == b']' {
318            p.pos += 1;
319            return Ok(());
320        }
321        loop {
322            parse_value(p, out, depth)?;
323            count += 1;
324            p.skip_ws();
325            match p.bump()? {
326                b',' => {
327                    p.skip_ws();
328                    continue;
329                }
330                b']' => break,
331                _ => return Err(INVALID_JSON_VIOLATION),
332            }
333        }
334        out.patch_u32_be(count_pos, count);
335        Ok(())
336    }
337
338    fn parse_object(
339        p: &mut Parser<'_>,
340        out: &mut JsonValue,
341        depth: usize,
342    ) -> Result<(), ShapeViolation> {
343        p.expect(b'{')?;
344        out.push_byte(TAG_OBJECT);
345        let count_pos = out.bytes.len();
346        out.push_u32_be(0);
347        let mut count: u32 = 0;
348        p.skip_ws();
349        if p.peek()? == b'}' {
350            p.pos += 1;
351            return Ok(());
352        }
353        loop {
354            p.skip_ws();
355            if p.peek()? != b'"' {
356                return Err(INVALID_JSON_VIOLATION);
357            }
358            let key = decode_string_into_nfc(p)?;
359            out.push_u32_be(key.len() as u32);
360            out.extend(&key);
361            p.skip_ws();
362            p.expect(b':')?;
363            p.skip_ws();
364            parse_value(p, out, depth)?;
365            count += 1;
366            p.skip_ws();
367            match p.bump()? {
368                b',' => continue,
369                b'}' => break,
370                _ => return Err(INVALID_JSON_VIOLATION),
371            }
372        }
373        out.patch_u32_be(count_pos, count);
374        Ok(())
375    }
376
377    fn parse_string(p: &mut Parser<'_>, out: &mut JsonValue) -> Result<(), ShapeViolation> {
378        let s = decode_string_into_nfc(p)?;
379        out.push_byte(TAG_STRING);
380        out.push_u32_be(s.len() as u32);
381        out.extend(&s);
382        Ok(())
383    }
384
385    /// Decode a JSON string literal at the cursor (escape handling +
386    /// NFC normalization), returning the NFC-normalized UTF-8 bytes.
387    fn decode_string_into_nfc(p: &mut Parser<'_>) -> Result<Vec<u8>, ShapeViolation> {
388        p.expect(b'"')?;
389        let mut stage1 = Vec::new();
390        loop {
391            if p.is_eof() {
392                return Err(INVALID_JSON_VIOLATION);
393            }
394            let b = p.input[p.pos];
395            match b {
396                b'"' => {
397                    p.pos += 1;
398                    break;
399                }
400                b'\\' => {
401                    p.pos += 1;
402                    let esc = p.bump()?;
403                    match esc {
404                        b'"' => stage1.push(b'"'),
405                        b'\\' => stage1.push(b'\\'),
406                        b'/' => stage1.push(b'/'),
407                        b'b' => stage1.push(0x08),
408                        b'f' => stage1.push(0x0C),
409                        b'n' => stage1.push(0x0A),
410                        b'r' => stage1.push(0x0D),
411                        b't' => stage1.push(0x09),
412                        b'u' => {
413                            let cp = decode_u_escape(p)?;
414                            let c = char::from_u32(cp).ok_or(INVALID_JSON_VIOLATION)?;
415                            let mut tmp = [0u8; 4];
416                            stage1.extend_from_slice(c.encode_utf8(&mut tmp).as_bytes());
417                        }
418                        _ => return Err(INVALID_JSON_VIOLATION),
419                    }
420                }
421                // Unescaped control characters forbidden by RFC 8259 §7.
422                0x00..=0x1F => return Err(INVALID_JSON_VIOLATION),
423                _ => {
424                    stage1.push(b);
425                    p.pos += 1;
426                }
427            }
428        }
429        normalize_nfc(&stage1)
430    }
431
432    /// NFC-normalize `stage1` into an owned buffer, growing the output
433    /// allocation until it fits (UAX #15 NFC expansion is bounded, so the
434    /// loop runs at most a couple of iterations).
435    fn normalize_nfc(stage1: &[u8]) -> Result<Vec<u8>, ShapeViolation> {
436        let mut cap = stage1.len().saturating_mul(3).max(64);
437        loop {
438            let mut buf = alloc::vec![0u8; cap];
439            match nfc::normalize_into(stage1, &mut buf) {
440                Ok(n) => {
441                    buf.truncate(n);
442                    return Ok(buf);
443                }
444                Err(nfc::NfcError::OutputOverflow) => {
445                    cap = cap.saturating_mul(2);
446                }
447                Err(_) => return Err(INVALID_JSON_VIOLATION),
448            }
449        }
450    }
451
452    fn decode_u_escape(p: &mut Parser<'_>) -> Result<u32, ShapeViolation> {
453        let high = decode_hex4(p)?;
454        if (0xD800..=0xDBFF).contains(&high) {
455            if p.input.get(p.pos..p.pos + 2) != Some(b"\\u") {
456                return Err(INVALID_JSON_VIOLATION);
457            }
458            p.pos += 2;
459            let low = decode_hex4(p)?;
460            if !(0xDC00..=0xDFFF).contains(&low) {
461                return Err(INVALID_JSON_VIOLATION);
462            }
463            Ok(0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00))
464        } else if (0xDC00..=0xDFFF).contains(&high) {
465            Err(INVALID_JSON_VIOLATION)
466        } else {
467            Ok(high)
468        }
469    }
470
471    fn decode_hex4(p: &mut Parser<'_>) -> Result<u32, ShapeViolation> {
472        if p.pos + 4 > p.input.len() {
473            return Err(INVALID_JSON_VIOLATION);
474        }
475        let mut v: u32 = 0;
476        for _ in 0..4 {
477            let d = p.input[p.pos];
478            p.pos += 1;
479            let nibble = match d {
480                b'0'..=b'9' => (d - b'0') as u32,
481                b'a'..=b'f' => 10 + (d - b'a') as u32,
482                b'A'..=b'F' => 10 + (d - b'A') as u32,
483                _ => return Err(INVALID_JSON_VIOLATION),
484            };
485            v = (v << 4) | nibble;
486        }
487        Ok(v)
488    }
489
490    fn parse_number(p: &mut Parser<'_>, out: &mut JsonValue) -> Result<(), ShapeViolation> {
491        let start = p.pos;
492        let mut has_decimal = false;
493        let mut has_exponent = false;
494        if p.peek()? == b'-' {
495            p.pos += 1;
496        }
497        match p.peek()? {
498            b'0' => p.pos += 1,
499            b'1'..=b'9' => {
500                p.pos += 1;
501                while let Ok(b) = p.peek() {
502                    if b.is_ascii_digit() {
503                        p.pos += 1;
504                    } else {
505                        break;
506                    }
507                }
508            }
509            _ => return Err(INVALID_JSON_VIOLATION),
510        }
511        if p.peek().ok() == Some(b'.') {
512            has_decimal = true;
513            p.pos += 1;
514            let frac_start = p.pos;
515            while let Ok(b) = p.peek() {
516                if b.is_ascii_digit() {
517                    p.pos += 1;
518                } else {
519                    break;
520                }
521            }
522            if p.pos == frac_start {
523                return Err(INVALID_JSON_VIOLATION);
524            }
525        }
526        if let Ok(b) = p.peek() {
527            if b == b'e' || b == b'E' {
528                has_exponent = true;
529                p.pos += 1;
530                if let Ok(s) = p.peek() {
531                    if s == b'+' || s == b'-' {
532                        p.pos += 1;
533                    }
534                }
535                let exp_start = p.pos;
536                while let Ok(d) = p.peek() {
537                    if d.is_ascii_digit() {
538                        p.pos += 1;
539                    } else {
540                        break;
541                    }
542                }
543                if p.pos == exp_start {
544                    return Err(INVALID_JSON_VIOLATION);
545                }
546            }
547        }
548        let raw = &p.input[start..p.pos];
549        let canon = canonicalize_number(raw, has_decimal || has_exponent)?;
550        out.push_byte(TAG_NUMBER);
551        out.push_u32_be(canon.len() as u32);
552        out.extend(&canon);
553        Ok(())
554    }
555
556    /// Canonicalize a JSON number per JCS-RFC8785 §3.2.2.3 (ECMA-262
557    /// 7.1.12.1): integer-syntax literals pass through verbatim (RFC 8259
558    /// forbids leading zeros / explicit `+`, so they are already in
559    /// ECMA-262 ToString form), float-syntax literals (and `-0`) route
560    /// through `f64` + `ryu` shortest-round-trip.
561    fn canonicalize_number(raw: &[u8], is_float_syntax: bool) -> Result<Vec<u8>, ShapeViolation> {
562        let is_negative_zero = raw == b"-0";
563        if is_float_syntax || is_negative_zero {
564            let s = core::str::from_utf8(raw).map_err(|_| INVALID_JSON_VIOLATION)?;
565            let v: f64 = s.parse().map_err(|_| INVALID_JSON_VIOLATION)?;
566            let mut ryu_buf = ryu::Buffer::new();
567            Ok(ryu_buf.format(v).as_bytes().to_vec())
568        } else {
569            Ok(raw.to_vec())
570        }
571    }
572
573    // ─── JCS canonicalizer (tagged bytes → canonical bytes) ─────────────
574
575    fn read_byte(tagged: &[u8], pos: &mut usize) -> Result<u8, ShapeViolation> {
576        if *pos >= tagged.len() {
577            return Err(INVALID_JSON_VIOLATION);
578        }
579        let b = tagged[*pos];
580        *pos += 1;
581        Ok(b)
582    }
583
584    fn read_u32_be(tagged: &[u8], pos: &mut usize) -> Result<u32, ShapeViolation> {
585        if *pos + 4 > tagged.len() {
586            return Err(INVALID_JSON_VIOLATION);
587        }
588        let v = u32::from_be_bytes([
589            tagged[*pos],
590            tagged[*pos + 1],
591            tagged[*pos + 2],
592            tagged[*pos + 3],
593        ]);
594        *pos += 4;
595        Ok(v)
596    }
597
598    fn read_slice<'a>(
599        tagged: &'a [u8],
600        pos: &mut usize,
601        len: usize,
602    ) -> Result<&'a [u8], ShapeViolation> {
603        if *pos + len > tagged.len() {
604            return Err(INVALID_JSON_VIOLATION);
605        }
606        let s = &tagged[*pos..*pos + len];
607        *pos += len;
608        Ok(s)
609    }
610
611    fn emit_value(tagged: &[u8], pos: &mut usize, out: &mut Vec<u8>) -> Result<(), ShapeViolation> {
612        let tag = read_byte(tagged, pos)?;
613        match tag {
614            TAG_NULL => {
615                out.extend_from_slice(b"null");
616                Ok(())
617            }
618            TAG_FALSE => {
619                out.extend_from_slice(b"false");
620                Ok(())
621            }
622            TAG_TRUE => {
623                out.extend_from_slice(b"true");
624                Ok(())
625            }
626            TAG_NUMBER => {
627                let len = read_u32_be(tagged, pos)? as usize;
628                let bytes = read_slice(tagged, pos, len)?;
629                out.extend_from_slice(bytes);
630                Ok(())
631            }
632            TAG_STRING => {
633                let len = read_u32_be(tagged, pos)? as usize;
634                let bytes = read_slice(tagged, pos, len)?;
635                emit_json_string(bytes, out);
636                Ok(())
637            }
638            TAG_ARRAY => {
639                let count = read_u32_be(tagged, pos)? as usize;
640                out.push(b'[');
641                for i in 0..count {
642                    if i > 0 {
643                        out.push(b',');
644                    }
645                    emit_value(tagged, pos, out)?;
646                }
647                out.push(b']');
648                Ok(())
649            }
650            TAG_OBJECT => emit_object(tagged, pos, out),
651            _ => Err(INVALID_JSON_VIOLATION),
652        }
653    }
654
655    fn emit_object(
656        tagged: &[u8],
657        pos: &mut usize,
658        out: &mut Vec<u8>,
659    ) -> Result<(), ShapeViolation> {
660        let count = read_u32_be(tagged, pos)? as usize;
661        // Collect each member's entry offset (start of its u32 keylen),
662        // then stable-sort by NFC key bytes (== lexicographic, strings are
663        // pre-normalized at parse time).
664        let mut entries: Vec<usize> = Vec::with_capacity(count);
665        for _ in 0..count {
666            entries.push(*pos);
667            let key_len = read_u32_be(tagged, pos)? as usize;
668            *pos += key_len;
669            if *pos > tagged.len() {
670                return Err(INVALID_JSON_VIOLATION);
671            }
672            skip_value(tagged, pos)?;
673        }
674        entries.sort_by(|&a, &b| entry_key(a, tagged).cmp(entry_key(b, tagged)));
675        out.push(b'{');
676        for (i, &entry_off) in entries.iter().enumerate() {
677            if i > 0 {
678                out.push(b',');
679            }
680            let mut p = entry_off;
681            let key_len = read_u32_be(tagged, &mut p)? as usize;
682            let key_bytes = read_slice(tagged, &mut p, key_len)?;
683            emit_json_string(key_bytes, out);
684            out.push(b':');
685            emit_value(tagged, &mut p, out)?;
686        }
687        out.push(b'}');
688        Ok(())
689    }
690
691    fn entry_key(off: usize, tagged: &[u8]) -> &[u8] {
692        if off + 4 > tagged.len() {
693            return &[];
694        }
695        let key_len = u32::from_be_bytes([
696            tagged[off],
697            tagged[off + 1],
698            tagged[off + 2],
699            tagged[off + 3],
700        ]) as usize;
701        let start = off + 4;
702        if start + key_len > tagged.len() {
703            return &[];
704        }
705        &tagged[start..start + key_len]
706    }
707
708    fn skip_value(tagged: &[u8], pos: &mut usize) -> Result<(), ShapeViolation> {
709        let tag = read_byte(tagged, pos)?;
710        match tag {
711            TAG_NULL | TAG_FALSE | TAG_TRUE => Ok(()),
712            TAG_NUMBER | TAG_STRING => {
713                let len = read_u32_be(tagged, pos)? as usize;
714                *pos += len;
715                if *pos > tagged.len() {
716                    Err(INVALID_JSON_VIOLATION)
717                } else {
718                    Ok(())
719                }
720            }
721            TAG_ARRAY => {
722                let count = read_u32_be(tagged, pos)? as usize;
723                for _ in 0..count {
724                    skip_value(tagged, pos)?;
725                }
726                Ok(())
727            }
728            TAG_OBJECT => {
729                let count = read_u32_be(tagged, pos)? as usize;
730                for _ in 0..count {
731                    let key_len = read_u32_be(tagged, pos)? as usize;
732                    *pos += key_len;
733                    if *pos > tagged.len() {
734                        return Err(INVALID_JSON_VIOLATION);
735                    }
736                    skip_value(tagged, pos)?;
737                }
738                Ok(())
739            }
740            _ => Err(INVALID_JSON_VIOLATION),
741        }
742    }
743
744    /// Emit `bytes` as a JCS-compliant JSON string literal.
745    fn emit_json_string(bytes: &[u8], out: &mut Vec<u8>) {
746        out.push(b'"');
747        for &b in bytes {
748            match b {
749                b'"' => out.extend_from_slice(b"\\\""),
750                b'\\' => out.extend_from_slice(b"\\\\"),
751                0x08 => out.extend_from_slice(b"\\b"),
752                0x09 => out.extend_from_slice(b"\\t"),
753                0x0A => out.extend_from_slice(b"\\n"),
754                0x0C => out.extend_from_slice(b"\\f"),
755                0x0D => out.extend_from_slice(b"\\r"),
756                0x00..=0x1F => {
757                    out.extend_from_slice(b"\\u00");
758                    out.push(nibble_hex(b >> 4));
759                    out.push(nibble_hex(b & 0x0f));
760                }
761                _ => out.push(b),
762            }
763        }
764        out.push(b'"');
765    }
766
767    fn nibble_hex(n: u8) -> u8 {
768        match n {
769            0..=9 => b'0' + n,
770            10..=15 => b'a' + (n - 10),
771            _ => b'0',
772        }
773    }
774
775    // ─── JsonValueRef — tagged-byte navigator for schema admission ──────
776
777    /// Zero-copy view into a tagged-byte JSON value (or sub-value), used
778    /// by the schema-pinned descendants to validate JSON-LD admission
779    /// predicates. Keys and string values are NFC-normalized; numbers
780    /// carry their canonical ASCII text.
781    #[derive(Clone, Copy)]
782    pub struct JsonValueRef<'a> {
783        tagged: &'a [u8],
784        offset: usize,
785    }
786
787    impl<'a> JsonValueRef<'a> {
788        /// Root navigator over a parsed [`JsonValue`].
789        pub fn root(value: &'a JsonValue) -> Self {
790            Self {
791                tagged: value.tagged_bytes(),
792                offset: 0,
793            }
794        }
795
796        /// Tag byte at this position.
797        pub fn tag(&self) -> u8 {
798            self.tagged[self.offset]
799        }
800        pub fn is_null(&self) -> bool {
801            self.tag() == TAG_NULL
802        }
803        pub fn is_bool(&self) -> bool {
804            matches!(self.tag(), TAG_FALSE | TAG_TRUE)
805        }
806        pub fn is_number(&self) -> bool {
807            self.tag() == TAG_NUMBER
808        }
809        pub fn is_string(&self) -> bool {
810            self.tag() == TAG_STRING
811        }
812        pub fn is_array(&self) -> bool {
813            self.tag() == TAG_ARRAY
814        }
815        pub fn is_object(&self) -> bool {
816            self.tag() == TAG_OBJECT
817        }
818
819        pub fn as_bool(&self) -> Option<bool> {
820            match self.tag() {
821                TAG_FALSE => Some(false),
822                TAG_TRUE => Some(true),
823                _ => None,
824            }
825        }
826
827        /// Borrow the NFC-normalized UTF-8 content of a string value.
828        pub fn as_str(&self) -> Option<&'a [u8]> {
829            if !self.is_string() {
830                return None;
831            }
832            let mut p = self.offset + 1;
833            let len = read_u32_be(self.tagged, &mut p).ok()? as usize;
834            Some(&self.tagged[p..p + len])
835        }
836
837        /// Borrow the canonical ASCII text of a number value.
838        pub fn as_number_str(&self) -> Option<&'a [u8]> {
839            if !self.is_number() {
840                return None;
841            }
842            let mut p = self.offset + 1;
843            let len = read_u32_be(self.tagged, &mut p).ok()? as usize;
844            Some(&self.tagged[p..p + len])
845        }
846
847        /// Look up an object entry by its NFC key bytes.
848        pub fn get(&self, key: &[u8]) -> Option<JsonValueRef<'a>> {
849            let mut iter = self.iter_object()?;
850            iter.find_map(|(k, v)| if k == key { Some(v) } else { None })
851        }
852
853        /// Iterate object entries `(key_bytes, value_ref)` in tagged-form
854        /// (input) order.
855        pub fn iter_object(&self) -> Option<ObjectIter<'a>> {
856            if !self.is_object() {
857                return None;
858            }
859            let mut p = self.offset + 1;
860            let count = read_u32_be(self.tagged, &mut p).ok()? as usize;
861            Some(ObjectIter {
862                tagged: self.tagged,
863                pos: p,
864                remaining: count,
865            })
866        }
867
868        /// Iterate array elements.
869        pub fn iter_array(&self) -> Option<ArrayIter<'a>> {
870            if !self.is_array() {
871                return None;
872            }
873            let mut p = self.offset + 1;
874            let count = read_u32_be(self.tagged, &mut p).ok()? as usize;
875            Some(ArrayIter {
876                tagged: self.tagged,
877                pos: p,
878                remaining: count,
879            })
880        }
881    }
882
883    /// Iterator over an object's `(key_bytes, value)` entries.
884    pub struct ObjectIter<'a> {
885        tagged: &'a [u8],
886        pos: usize,
887        remaining: usize,
888    }
889
890    impl<'a> Iterator for ObjectIter<'a> {
891        type Item = (&'a [u8], JsonValueRef<'a>);
892        fn next(&mut self) -> Option<Self::Item> {
893            if self.remaining == 0 {
894                return None;
895            }
896            let key_len = read_u32_be(self.tagged, &mut self.pos).ok()? as usize;
897            let key_end = self.pos + key_len;
898            let key = &self.tagged[self.pos..key_end];
899            self.pos = key_end;
900            let value_offset = self.pos;
901            self.pos = skip_to_end(self.tagged, self.pos).ok()?;
902            self.remaining -= 1;
903            Some((
904                key,
905                JsonValueRef {
906                    tagged: self.tagged,
907                    offset: value_offset,
908                },
909            ))
910        }
911    }
912
913    /// Iterator over an array's elements.
914    pub struct ArrayIter<'a> {
915        tagged: &'a [u8],
916        pos: usize,
917        remaining: usize,
918    }
919
920    impl<'a> Iterator for ArrayIter<'a> {
921        type Item = JsonValueRef<'a>;
922        fn next(&mut self) -> Option<Self::Item> {
923            if self.remaining == 0 {
924                return None;
925            }
926            let value_offset = self.pos;
927            self.pos = skip_to_end(self.tagged, self.pos).ok()?;
928            self.remaining -= 1;
929            Some(JsonValueRef {
930                tagged: self.tagged,
931                offset: value_offset,
932            })
933        }
934    }
935
936    fn skip_to_end(tagged: &[u8], pos: usize) -> Result<usize, ShapeViolation> {
937        let mut p = pos;
938        let tag = read_byte(tagged, &mut p)?;
939        match tag {
940            TAG_NULL | TAG_FALSE | TAG_TRUE => Ok(p),
941            TAG_NUMBER | TAG_STRING => {
942                let len = read_u32_be(tagged, &mut p)? as usize;
943                Ok(p + len)
944            }
945            TAG_ARRAY => {
946                let count = read_u32_be(tagged, &mut p)? as usize;
947                for _ in 0..count {
948                    p = skip_to_end(tagged, p)?;
949                }
950                Ok(p)
951            }
952            TAG_OBJECT => {
953                let count = read_u32_be(tagged, &mut p)? as usize;
954                for _ in 0..count {
955                    let key_len = read_u32_be(tagged, &mut p)? as usize;
956                    p += key_len;
957                    p = skip_to_end(tagged, p)?;
958                }
959                Ok(p)
960            }
961            _ => Err(INVALID_JSON_VIOLATION),
962        }
963    }
964
965    #[cfg(test)]
966    mod tests {
967        use super::*;
968
969        #[test]
970        fn parses_simple_object() {
971            let v = JsonValue::parse(br#"{"foo":"bar"}"#).expect("valid");
972            assert_eq!(v.bytes[0], TAG_OBJECT);
973        }
974
975        #[test]
976        fn rejects_invalid_json() {
977            let err = JsonValue::parse(b"not json").expect_err("must reject");
978            assert_eq!(err.shape_iri, INVALID_JSON_VIOLATION.shape_iri);
979        }
980
981        #[test]
982        fn rejects_overdeep_recursion() {
983            use alloc::string::String;
984            let mut s = String::new();
985            for _ in 0..(MAX_JSON_DEPTH + 2) {
986                s.push('[');
987            }
988            for _ in 0..(MAX_JSON_DEPTH + 2) {
989                s.push(']');
990            }
991            let err = JsonValue::parse(s.as_bytes()).expect_err("must reject");
992            assert_eq!(err.constraint_iri, DEPTH_BOUND_VIOLATION.constraint_iri);
993        }
994
995        #[test]
996        fn accepts_unbounded_string_width() {
997            use alloc::format;
998            use alloc::string::String;
999            let big: String = "a".repeat(200_000);
1000            let raw = format!("\"{big}\"");
1001            let canon = canonicalize(raw.as_bytes()).expect("unbounded string admitted");
1002            assert_eq!(canon.len(), big.len() + 2);
1003        }
1004
1005        const CANONICAL_FIXTURES: &[(&[u8], &[u8])] = &[
1006            (br#"{"foo":"bar"}"#, br#"{"foo":"bar"}"#),
1007            (br#"{"b": 1, "a": 2}"#, br#"{"a":2,"b":1}"#),
1008            (
1009                br#"{"nested": {"deep": {"value": "found"}}}"#,
1010                br#"{"nested":{"deep":{"value":"found"}}}"#,
1011            ),
1012            (
1013                br#"{"int": 42, "bool": true, "null_val": null}"#,
1014                br#"{"bool":true,"int":42,"null_val":null}"#,
1015            ),
1016            (b"[1, 2, 3]", b"[1,2,3]"),
1017            (br#"["a", "b", "c"]"#, br#"["a","b","c"]"#),
1018        ];
1019
1020        #[test]
1021        fn canonicalizer_matches_reference_for_inline_fixtures() {
1022            for (raw, expected) in CANONICAL_FIXTURES {
1023                let canon = canonicalize(raw).expect("valid");
1024                assert_eq!(canon, *expected, "raw={raw:?}");
1025            }
1026        }
1027
1028        #[test]
1029        fn canonicalizer_collapses_unicode_decomposed_to_composed() {
1030            let decomposed = "{\"name\": \"cafe\u{0301}\"}".as_bytes();
1031            let composed = "{\"name\":\"caf\u{00E9}\"}".as_bytes();
1032            assert_eq!(
1033                canonicalize(decomposed).expect("valid"),
1034                canonicalize(composed).expect("valid")
1035            );
1036        }
1037
1038        #[test]
1039        fn canonicalize_is_idempotent_on_its_own_output() {
1040            for (raw, _expected) in CANONICAL_FIXTURES {
1041                let once = canonicalize(raw).expect("valid");
1042                let twice = canonicalize(&once).expect("re-canonicalises");
1043                assert_eq!(once, twice, "idempotence broken for {raw:?}");
1044            }
1045        }
1046    }
1047}