1use prism::operation::TermValue;
40use prism::pipeline::{
41 ConstrainedTypeShape, ConstraintRef, IntoBindingValue, PartitionProductFields,
42};
43
44#[cfg(feature = "alloc")]
48pub(crate) const TAG_NULL: u8 = 0x00;
49#[cfg(feature = "alloc")]
50pub(crate) const TAG_FALSE: u8 = 0x01;
51#[cfg(feature = "alloc")]
52pub(crate) const TAG_TRUE: u8 = 0x02;
53#[cfg(feature = "alloc")]
54pub(crate) const TAG_NUMBER: u8 = 0x03;
55#[cfg(feature = "alloc")]
56pub(crate) const TAG_STRING: u8 = 0x04;
57#[cfg(feature = "alloc")]
58pub(crate) const TAG_ARRAY: u8 = 0x05;
59#[cfg(feature = "alloc")]
60pub(crate) const TAG_OBJECT: u8 = 0x06;
61
62#[cfg(feature = "alloc")]
65const INVALID_JSON_VIOLATION: prism::pipeline::ShapeViolation = prism::pipeline::ShapeViolation {
66 shape_iri: "https://uor.foundation/addr/JsonValue",
67 constraint_iri: "https://uor.foundation/addr/JsonValue/validUtf8Json",
68 property_iri: "https://uor.foundation/addr/inputBytes",
69 expected_range: "https://uor.foundation/addr/ValidUtf8Json",
70 min_count: 0,
71 max_count: 1,
72 kind: prism::pipeline::ViolationKind::ValueCheck,
73};
74
75#[cfg(feature = "alloc")]
76const DEPTH_BOUND_VIOLATION: prism::pipeline::ShapeViolation = prism::pipeline::ShapeViolation {
77 shape_iri: "https://uor.foundation/addr/JsonValue",
78 constraint_iri: "https://uor.foundation/addr/JsonValue/depthBound",
79 property_iri: "https://uor.foundation/addr/JsonValue/depth",
80 expected_range: "http://www.w3.org/2001/XMLSchema#nonNegativeInteger",
81 min_count: 0,
82 max_count: crate::json::shapes::bounds::MAX_JSON_DEPTH as u32,
83 kind: prism::pipeline::ViolationKind::CardinalityViolation,
84};
85
86#[derive(Clone, Copy, Debug)]
92pub struct JsonCarrier<'a>(&'a [u8]);
93
94impl<'a> JsonCarrier<'a> {
95 #[must_use]
97 pub fn new(canonical_bytes: &'a [u8]) -> Self {
98 Self(canonical_bytes)
99 }
100
101 #[must_use]
103 pub fn canonical_bytes(&self) -> &'a [u8] {
104 self.0
105 }
106}
107
108impl ConstrainedTypeShape for JsonCarrier<'_> {
109 const IRI: &'static str = "https://uor.foundation/addr/JsonValue";
110 const SITE_COUNT: usize = 1;
111 const CONSTRAINTS: &'static [ConstraintRef] = &[];
112 const CYCLE_SIZE: u64 = u64::MAX;
113}
114
115impl prism::uor_foundation::pipeline::__sdk_seal::Sealed for JsonCarrier<'_> {}
116
117impl<'a> IntoBindingValue<'a> for JsonCarrier<'a> {
118 fn as_binding_value<const INLINE_BYTES: usize>(&self) -> TermValue<'a, INLINE_BYTES> {
119 TermValue::borrowed(self.0)
120 }
121}
122
123impl PartitionProductFields for JsonCarrier<'_> {
124 const FIELDS: &'static [(u32, u32)] = &[];
125 const FIELD_NAMES: &'static [&'static str] = &[];
126}
127
128#[cfg(feature = "alloc")]
133pub use alloc_impl::{canonicalize, ArrayIter, JsonValue, JsonValueRef, ObjectIter};
134
135#[cfg(feature = "alloc")]
136mod alloc_impl {
137 use super::*;
138 use crate::canonical::nfc;
139 use crate::json::shapes::bounds::MAX_JSON_DEPTH;
140 use alloc::vec::Vec;
141 use prism::pipeline::ShapeViolation;
142
143 #[derive(Clone, PartialEq, Eq)]
151 pub struct JsonValue {
152 pub(crate) bytes: Vec<u8>,
153 }
154
155 impl core::fmt::Debug for JsonValue {
156 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
157 f.debug_struct("JsonValue")
158 .field("len", &self.bytes.len())
159 .finish_non_exhaustive()
160 }
161 }
162
163 impl JsonValue {
164 pub fn parse(raw: &[u8]) -> Result<Self, ShapeViolation> {
174 let mut value = Self { bytes: Vec::new() };
175 let mut p = Parser::new(raw);
176 p.skip_ws();
177 parse_value(&mut p, &mut value, 0)?;
178 p.skip_ws();
179 if !p.is_eof() {
180 return Err(INVALID_JSON_VIOLATION);
181 }
182 Ok(value)
183 }
184
185 #[must_use]
189 pub fn tagged_bytes(&self) -> &[u8] {
190 &self.bytes
191 }
192
193 fn push_byte(&mut self, b: u8) {
194 self.bytes.push(b);
195 }
196
197 fn push_u32_be(&mut self, v: u32) {
198 self.bytes.extend_from_slice(&v.to_be_bytes());
199 }
200
201 fn extend(&mut self, data: &[u8]) {
202 self.bytes.extend_from_slice(data);
203 }
204
205 fn patch_u32_be(&mut self, offset: usize, v: u32) {
206 self.bytes[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
207 }
208 }
209
210 pub fn canonicalize(raw: &[u8]) -> Result<Vec<u8>, ShapeViolation> {
216 let value = JsonValue::parse(raw)?;
217 let mut out = Vec::new();
218 let mut pos = 0;
219 emit_value(value.tagged_bytes(), &mut pos, &mut out)?;
220 Ok(out)
221 }
222
223 struct Parser<'a> {
226 input: &'a [u8],
227 pos: usize,
228 }
229
230 impl<'a> Parser<'a> {
231 fn new(input: &'a [u8]) -> Self {
232 Self { input, pos: 0 }
233 }
234 fn is_eof(&self) -> bool {
235 self.pos >= self.input.len()
236 }
237 fn peek(&self) -> Result<u8, ShapeViolation> {
238 if self.is_eof() {
239 return Err(INVALID_JSON_VIOLATION);
240 }
241 Ok(self.input[self.pos])
242 }
243 fn bump(&mut self) -> Result<u8, ShapeViolation> {
244 let b = self.peek()?;
245 self.pos += 1;
246 Ok(b)
247 }
248 fn skip_ws(&mut self) {
249 while self.pos < self.input.len() {
250 match self.input[self.pos] {
251 b' ' | b'\t' | b'\n' | b'\r' => self.pos += 1,
252 _ => break,
253 }
254 }
255 }
256 fn expect(&mut self, byte: u8) -> Result<(), ShapeViolation> {
257 if self.bump()? != byte {
258 return Err(INVALID_JSON_VIOLATION);
259 }
260 Ok(())
261 }
262 fn expect_lit(&mut self, lit: &[u8]) -> Result<(), ShapeViolation> {
263 if self.pos + lit.len() > self.input.len()
264 || &self.input[self.pos..self.pos + lit.len()] != lit
265 {
266 return Err(INVALID_JSON_VIOLATION);
267 }
268 self.pos += lit.len();
269 Ok(())
270 }
271 }
272
273 fn parse_value(
274 p: &mut Parser<'_>,
275 out: &mut JsonValue,
276 depth: usize,
277 ) -> Result<(), ShapeViolation> {
278 if depth > MAX_JSON_DEPTH {
279 return Err(DEPTH_BOUND_VIOLATION);
280 }
281 p.skip_ws();
282 match p.peek()? {
283 b'n' => {
284 p.expect_lit(b"null")?;
285 out.push_byte(TAG_NULL);
286 Ok(())
287 }
288 b't' => {
289 p.expect_lit(b"true")?;
290 out.push_byte(TAG_TRUE);
291 Ok(())
292 }
293 b'f' => {
294 p.expect_lit(b"false")?;
295 out.push_byte(TAG_FALSE);
296 Ok(())
297 }
298 b'"' => parse_string(p, out),
299 b'-' | b'0'..=b'9' => parse_number(p, out),
300 b'[' => parse_array(p, out, depth + 1),
301 b'{' => parse_object(p, out, depth + 1),
302 _ => Err(INVALID_JSON_VIOLATION),
303 }
304 }
305
306 fn parse_array(
307 p: &mut Parser<'_>,
308 out: &mut JsonValue,
309 depth: usize,
310 ) -> Result<(), ShapeViolation> {
311 p.expect(b'[')?;
312 out.push_byte(TAG_ARRAY);
313 let count_pos = out.bytes.len();
314 out.push_u32_be(0);
315 let mut count: u32 = 0;
316 p.skip_ws();
317 if p.peek()? == b']' {
318 p.pos += 1;
319 return Ok(());
320 }
321 loop {
322 parse_value(p, out, depth)?;
323 count += 1;
324 p.skip_ws();
325 match p.bump()? {
326 b',' => {
327 p.skip_ws();
328 continue;
329 }
330 b']' => break,
331 _ => return Err(INVALID_JSON_VIOLATION),
332 }
333 }
334 out.patch_u32_be(count_pos, count);
335 Ok(())
336 }
337
338 fn parse_object(
339 p: &mut Parser<'_>,
340 out: &mut JsonValue,
341 depth: usize,
342 ) -> Result<(), ShapeViolation> {
343 p.expect(b'{')?;
344 out.push_byte(TAG_OBJECT);
345 let count_pos = out.bytes.len();
346 out.push_u32_be(0);
347 let mut count: u32 = 0;
348 p.skip_ws();
349 if p.peek()? == b'}' {
350 p.pos += 1;
351 return Ok(());
352 }
353 loop {
354 p.skip_ws();
355 if p.peek()? != b'"' {
356 return Err(INVALID_JSON_VIOLATION);
357 }
358 let key = decode_string_into_nfc(p)?;
359 out.push_u32_be(key.len() as u32);
360 out.extend(&key);
361 p.skip_ws();
362 p.expect(b':')?;
363 p.skip_ws();
364 parse_value(p, out, depth)?;
365 count += 1;
366 p.skip_ws();
367 match p.bump()? {
368 b',' => continue,
369 b'}' => break,
370 _ => return Err(INVALID_JSON_VIOLATION),
371 }
372 }
373 out.patch_u32_be(count_pos, count);
374 Ok(())
375 }
376
377 fn parse_string(p: &mut Parser<'_>, out: &mut JsonValue) -> Result<(), ShapeViolation> {
378 let s = decode_string_into_nfc(p)?;
379 out.push_byte(TAG_STRING);
380 out.push_u32_be(s.len() as u32);
381 out.extend(&s);
382 Ok(())
383 }
384
385 fn decode_string_into_nfc(p: &mut Parser<'_>) -> Result<Vec<u8>, ShapeViolation> {
388 p.expect(b'"')?;
389 let mut stage1 = Vec::new();
390 loop {
391 if p.is_eof() {
392 return Err(INVALID_JSON_VIOLATION);
393 }
394 let b = p.input[p.pos];
395 match b {
396 b'"' => {
397 p.pos += 1;
398 break;
399 }
400 b'\\' => {
401 p.pos += 1;
402 let esc = p.bump()?;
403 match esc {
404 b'"' => stage1.push(b'"'),
405 b'\\' => stage1.push(b'\\'),
406 b'/' => stage1.push(b'/'),
407 b'b' => stage1.push(0x08),
408 b'f' => stage1.push(0x0C),
409 b'n' => stage1.push(0x0A),
410 b'r' => stage1.push(0x0D),
411 b't' => stage1.push(0x09),
412 b'u' => {
413 let cp = decode_u_escape(p)?;
414 let c = char::from_u32(cp).ok_or(INVALID_JSON_VIOLATION)?;
415 let mut tmp = [0u8; 4];
416 stage1.extend_from_slice(c.encode_utf8(&mut tmp).as_bytes());
417 }
418 _ => return Err(INVALID_JSON_VIOLATION),
419 }
420 }
421 0x00..=0x1F => return Err(INVALID_JSON_VIOLATION),
423 _ => {
424 stage1.push(b);
425 p.pos += 1;
426 }
427 }
428 }
429 normalize_nfc(&stage1)
430 }
431
432 fn normalize_nfc(stage1: &[u8]) -> Result<Vec<u8>, ShapeViolation> {
436 let mut cap = stage1.len().saturating_mul(3).max(64);
437 loop {
438 let mut buf = alloc::vec![0u8; cap];
439 match nfc::normalize_into(stage1, &mut buf) {
440 Ok(n) => {
441 buf.truncate(n);
442 return Ok(buf);
443 }
444 Err(nfc::NfcError::OutputOverflow) => {
445 cap = cap.saturating_mul(2);
446 }
447 Err(_) => return Err(INVALID_JSON_VIOLATION),
448 }
449 }
450 }
451
452 fn decode_u_escape(p: &mut Parser<'_>) -> Result<u32, ShapeViolation> {
453 let high = decode_hex4(p)?;
454 if (0xD800..=0xDBFF).contains(&high) {
455 if p.input.get(p.pos..p.pos + 2) != Some(b"\\u") {
456 return Err(INVALID_JSON_VIOLATION);
457 }
458 p.pos += 2;
459 let low = decode_hex4(p)?;
460 if !(0xDC00..=0xDFFF).contains(&low) {
461 return Err(INVALID_JSON_VIOLATION);
462 }
463 Ok(0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00))
464 } else if (0xDC00..=0xDFFF).contains(&high) {
465 Err(INVALID_JSON_VIOLATION)
466 } else {
467 Ok(high)
468 }
469 }
470
471 fn decode_hex4(p: &mut Parser<'_>) -> Result<u32, ShapeViolation> {
472 if p.pos + 4 > p.input.len() {
473 return Err(INVALID_JSON_VIOLATION);
474 }
475 let mut v: u32 = 0;
476 for _ in 0..4 {
477 let d = p.input[p.pos];
478 p.pos += 1;
479 let nibble = match d {
480 b'0'..=b'9' => (d - b'0') as u32,
481 b'a'..=b'f' => 10 + (d - b'a') as u32,
482 b'A'..=b'F' => 10 + (d - b'A') as u32,
483 _ => return Err(INVALID_JSON_VIOLATION),
484 };
485 v = (v << 4) | nibble;
486 }
487 Ok(v)
488 }
489
490 fn parse_number(p: &mut Parser<'_>, out: &mut JsonValue) -> Result<(), ShapeViolation> {
491 let start = p.pos;
492 let mut has_decimal = false;
493 let mut has_exponent = false;
494 if p.peek()? == b'-' {
495 p.pos += 1;
496 }
497 match p.peek()? {
498 b'0' => p.pos += 1,
499 b'1'..=b'9' => {
500 p.pos += 1;
501 while let Ok(b) = p.peek() {
502 if b.is_ascii_digit() {
503 p.pos += 1;
504 } else {
505 break;
506 }
507 }
508 }
509 _ => return Err(INVALID_JSON_VIOLATION),
510 }
511 if p.peek().ok() == Some(b'.') {
512 has_decimal = true;
513 p.pos += 1;
514 let frac_start = p.pos;
515 while let Ok(b) = p.peek() {
516 if b.is_ascii_digit() {
517 p.pos += 1;
518 } else {
519 break;
520 }
521 }
522 if p.pos == frac_start {
523 return Err(INVALID_JSON_VIOLATION);
524 }
525 }
526 if let Ok(b) = p.peek() {
527 if b == b'e' || b == b'E' {
528 has_exponent = true;
529 p.pos += 1;
530 if let Ok(s) = p.peek() {
531 if s == b'+' || s == b'-' {
532 p.pos += 1;
533 }
534 }
535 let exp_start = p.pos;
536 while let Ok(d) = p.peek() {
537 if d.is_ascii_digit() {
538 p.pos += 1;
539 } else {
540 break;
541 }
542 }
543 if p.pos == exp_start {
544 return Err(INVALID_JSON_VIOLATION);
545 }
546 }
547 }
548 let raw = &p.input[start..p.pos];
549 let canon = canonicalize_number(raw, has_decimal || has_exponent)?;
550 out.push_byte(TAG_NUMBER);
551 out.push_u32_be(canon.len() as u32);
552 out.extend(&canon);
553 Ok(())
554 }
555
556 fn canonicalize_number(raw: &[u8], is_float_syntax: bool) -> Result<Vec<u8>, ShapeViolation> {
562 let is_negative_zero = raw == b"-0";
563 if is_float_syntax || is_negative_zero {
564 let s = core::str::from_utf8(raw).map_err(|_| INVALID_JSON_VIOLATION)?;
565 let v: f64 = s.parse().map_err(|_| INVALID_JSON_VIOLATION)?;
566 let mut ryu_buf = ryu::Buffer::new();
567 Ok(ryu_buf.format(v).as_bytes().to_vec())
568 } else {
569 Ok(raw.to_vec())
570 }
571 }
572
573 fn read_byte(tagged: &[u8], pos: &mut usize) -> Result<u8, ShapeViolation> {
576 if *pos >= tagged.len() {
577 return Err(INVALID_JSON_VIOLATION);
578 }
579 let b = tagged[*pos];
580 *pos += 1;
581 Ok(b)
582 }
583
584 fn read_u32_be(tagged: &[u8], pos: &mut usize) -> Result<u32, ShapeViolation> {
585 if *pos + 4 > tagged.len() {
586 return Err(INVALID_JSON_VIOLATION);
587 }
588 let v = u32::from_be_bytes([
589 tagged[*pos],
590 tagged[*pos + 1],
591 tagged[*pos + 2],
592 tagged[*pos + 3],
593 ]);
594 *pos += 4;
595 Ok(v)
596 }
597
598 fn read_slice<'a>(
599 tagged: &'a [u8],
600 pos: &mut usize,
601 len: usize,
602 ) -> Result<&'a [u8], ShapeViolation> {
603 if *pos + len > tagged.len() {
604 return Err(INVALID_JSON_VIOLATION);
605 }
606 let s = &tagged[*pos..*pos + len];
607 *pos += len;
608 Ok(s)
609 }
610
611 fn emit_value(tagged: &[u8], pos: &mut usize, out: &mut Vec<u8>) -> Result<(), ShapeViolation> {
612 let tag = read_byte(tagged, pos)?;
613 match tag {
614 TAG_NULL => {
615 out.extend_from_slice(b"null");
616 Ok(())
617 }
618 TAG_FALSE => {
619 out.extend_from_slice(b"false");
620 Ok(())
621 }
622 TAG_TRUE => {
623 out.extend_from_slice(b"true");
624 Ok(())
625 }
626 TAG_NUMBER => {
627 let len = read_u32_be(tagged, pos)? as usize;
628 let bytes = read_slice(tagged, pos, len)?;
629 out.extend_from_slice(bytes);
630 Ok(())
631 }
632 TAG_STRING => {
633 let len = read_u32_be(tagged, pos)? as usize;
634 let bytes = read_slice(tagged, pos, len)?;
635 emit_json_string(bytes, out);
636 Ok(())
637 }
638 TAG_ARRAY => {
639 let count = read_u32_be(tagged, pos)? as usize;
640 out.push(b'[');
641 for i in 0..count {
642 if i > 0 {
643 out.push(b',');
644 }
645 emit_value(tagged, pos, out)?;
646 }
647 out.push(b']');
648 Ok(())
649 }
650 TAG_OBJECT => emit_object(tagged, pos, out),
651 _ => Err(INVALID_JSON_VIOLATION),
652 }
653 }
654
655 fn emit_object(
656 tagged: &[u8],
657 pos: &mut usize,
658 out: &mut Vec<u8>,
659 ) -> Result<(), ShapeViolation> {
660 let count = read_u32_be(tagged, pos)? as usize;
661 let mut entries: Vec<usize> = Vec::with_capacity(count);
665 for _ in 0..count {
666 entries.push(*pos);
667 let key_len = read_u32_be(tagged, pos)? as usize;
668 *pos += key_len;
669 if *pos > tagged.len() {
670 return Err(INVALID_JSON_VIOLATION);
671 }
672 skip_value(tagged, pos)?;
673 }
674 entries.sort_by(|&a, &b| entry_key(a, tagged).cmp(entry_key(b, tagged)));
675 out.push(b'{');
676 for (i, &entry_off) in entries.iter().enumerate() {
677 if i > 0 {
678 out.push(b',');
679 }
680 let mut p = entry_off;
681 let key_len = read_u32_be(tagged, &mut p)? as usize;
682 let key_bytes = read_slice(tagged, &mut p, key_len)?;
683 emit_json_string(key_bytes, out);
684 out.push(b':');
685 emit_value(tagged, &mut p, out)?;
686 }
687 out.push(b'}');
688 Ok(())
689 }
690
691 fn entry_key(off: usize, tagged: &[u8]) -> &[u8] {
692 if off + 4 > tagged.len() {
693 return &[];
694 }
695 let key_len = u32::from_be_bytes([
696 tagged[off],
697 tagged[off + 1],
698 tagged[off + 2],
699 tagged[off + 3],
700 ]) as usize;
701 let start = off + 4;
702 if start + key_len > tagged.len() {
703 return &[];
704 }
705 &tagged[start..start + key_len]
706 }
707
708 fn skip_value(tagged: &[u8], pos: &mut usize) -> Result<(), ShapeViolation> {
709 let tag = read_byte(tagged, pos)?;
710 match tag {
711 TAG_NULL | TAG_FALSE | TAG_TRUE => Ok(()),
712 TAG_NUMBER | TAG_STRING => {
713 let len = read_u32_be(tagged, pos)? as usize;
714 *pos += len;
715 if *pos > tagged.len() {
716 Err(INVALID_JSON_VIOLATION)
717 } else {
718 Ok(())
719 }
720 }
721 TAG_ARRAY => {
722 let count = read_u32_be(tagged, pos)? as usize;
723 for _ in 0..count {
724 skip_value(tagged, pos)?;
725 }
726 Ok(())
727 }
728 TAG_OBJECT => {
729 let count = read_u32_be(tagged, pos)? as usize;
730 for _ in 0..count {
731 let key_len = read_u32_be(tagged, pos)? as usize;
732 *pos += key_len;
733 if *pos > tagged.len() {
734 return Err(INVALID_JSON_VIOLATION);
735 }
736 skip_value(tagged, pos)?;
737 }
738 Ok(())
739 }
740 _ => Err(INVALID_JSON_VIOLATION),
741 }
742 }
743
744 fn emit_json_string(bytes: &[u8], out: &mut Vec<u8>) {
746 out.push(b'"');
747 for &b in bytes {
748 match b {
749 b'"' => out.extend_from_slice(b"\\\""),
750 b'\\' => out.extend_from_slice(b"\\\\"),
751 0x08 => out.extend_from_slice(b"\\b"),
752 0x09 => out.extend_from_slice(b"\\t"),
753 0x0A => out.extend_from_slice(b"\\n"),
754 0x0C => out.extend_from_slice(b"\\f"),
755 0x0D => out.extend_from_slice(b"\\r"),
756 0x00..=0x1F => {
757 out.extend_from_slice(b"\\u00");
758 out.push(nibble_hex(b >> 4));
759 out.push(nibble_hex(b & 0x0f));
760 }
761 _ => out.push(b),
762 }
763 }
764 out.push(b'"');
765 }
766
767 fn nibble_hex(n: u8) -> u8 {
768 match n {
769 0..=9 => b'0' + n,
770 10..=15 => b'a' + (n - 10),
771 _ => b'0',
772 }
773 }
774
775 #[derive(Clone, Copy)]
782 pub struct JsonValueRef<'a> {
783 tagged: &'a [u8],
784 offset: usize,
785 }
786
787 impl<'a> JsonValueRef<'a> {
788 pub fn root(value: &'a JsonValue) -> Self {
790 Self {
791 tagged: value.tagged_bytes(),
792 offset: 0,
793 }
794 }
795
796 pub fn tag(&self) -> u8 {
798 self.tagged[self.offset]
799 }
800 pub fn is_null(&self) -> bool {
801 self.tag() == TAG_NULL
802 }
803 pub fn is_bool(&self) -> bool {
804 matches!(self.tag(), TAG_FALSE | TAG_TRUE)
805 }
806 pub fn is_number(&self) -> bool {
807 self.tag() == TAG_NUMBER
808 }
809 pub fn is_string(&self) -> bool {
810 self.tag() == TAG_STRING
811 }
812 pub fn is_array(&self) -> bool {
813 self.tag() == TAG_ARRAY
814 }
815 pub fn is_object(&self) -> bool {
816 self.tag() == TAG_OBJECT
817 }
818
819 pub fn as_bool(&self) -> Option<bool> {
820 match self.tag() {
821 TAG_FALSE => Some(false),
822 TAG_TRUE => Some(true),
823 _ => None,
824 }
825 }
826
827 pub fn as_str(&self) -> Option<&'a [u8]> {
829 if !self.is_string() {
830 return None;
831 }
832 let mut p = self.offset + 1;
833 let len = read_u32_be(self.tagged, &mut p).ok()? as usize;
834 Some(&self.tagged[p..p + len])
835 }
836
837 pub fn as_number_str(&self) -> Option<&'a [u8]> {
839 if !self.is_number() {
840 return None;
841 }
842 let mut p = self.offset + 1;
843 let len = read_u32_be(self.tagged, &mut p).ok()? as usize;
844 Some(&self.tagged[p..p + len])
845 }
846
847 pub fn get(&self, key: &[u8]) -> Option<JsonValueRef<'a>> {
849 let mut iter = self.iter_object()?;
850 iter.find_map(|(k, v)| if k == key { Some(v) } else { None })
851 }
852
853 pub fn iter_object(&self) -> Option<ObjectIter<'a>> {
856 if !self.is_object() {
857 return None;
858 }
859 let mut p = self.offset + 1;
860 let count = read_u32_be(self.tagged, &mut p).ok()? as usize;
861 Some(ObjectIter {
862 tagged: self.tagged,
863 pos: p,
864 remaining: count,
865 })
866 }
867
868 pub fn iter_array(&self) -> Option<ArrayIter<'a>> {
870 if !self.is_array() {
871 return None;
872 }
873 let mut p = self.offset + 1;
874 let count = read_u32_be(self.tagged, &mut p).ok()? as usize;
875 Some(ArrayIter {
876 tagged: self.tagged,
877 pos: p,
878 remaining: count,
879 })
880 }
881 }
882
883 pub struct ObjectIter<'a> {
885 tagged: &'a [u8],
886 pos: usize,
887 remaining: usize,
888 }
889
890 impl<'a> Iterator for ObjectIter<'a> {
891 type Item = (&'a [u8], JsonValueRef<'a>);
892 fn next(&mut self) -> Option<Self::Item> {
893 if self.remaining == 0 {
894 return None;
895 }
896 let key_len = read_u32_be(self.tagged, &mut self.pos).ok()? as usize;
897 let key_end = self.pos + key_len;
898 let key = &self.tagged[self.pos..key_end];
899 self.pos = key_end;
900 let value_offset = self.pos;
901 self.pos = skip_to_end(self.tagged, self.pos).ok()?;
902 self.remaining -= 1;
903 Some((
904 key,
905 JsonValueRef {
906 tagged: self.tagged,
907 offset: value_offset,
908 },
909 ))
910 }
911 }
912
913 pub struct ArrayIter<'a> {
915 tagged: &'a [u8],
916 pos: usize,
917 remaining: usize,
918 }
919
920 impl<'a> Iterator for ArrayIter<'a> {
921 type Item = JsonValueRef<'a>;
922 fn next(&mut self) -> Option<Self::Item> {
923 if self.remaining == 0 {
924 return None;
925 }
926 let value_offset = self.pos;
927 self.pos = skip_to_end(self.tagged, self.pos).ok()?;
928 self.remaining -= 1;
929 Some(JsonValueRef {
930 tagged: self.tagged,
931 offset: value_offset,
932 })
933 }
934 }
935
936 fn skip_to_end(tagged: &[u8], pos: usize) -> Result<usize, ShapeViolation> {
937 let mut p = pos;
938 let tag = read_byte(tagged, &mut p)?;
939 match tag {
940 TAG_NULL | TAG_FALSE | TAG_TRUE => Ok(p),
941 TAG_NUMBER | TAG_STRING => {
942 let len = read_u32_be(tagged, &mut p)? as usize;
943 Ok(p + len)
944 }
945 TAG_ARRAY => {
946 let count = read_u32_be(tagged, &mut p)? as usize;
947 for _ in 0..count {
948 p = skip_to_end(tagged, p)?;
949 }
950 Ok(p)
951 }
952 TAG_OBJECT => {
953 let count = read_u32_be(tagged, &mut p)? as usize;
954 for _ in 0..count {
955 let key_len = read_u32_be(tagged, &mut p)? as usize;
956 p += key_len;
957 p = skip_to_end(tagged, p)?;
958 }
959 Ok(p)
960 }
961 _ => Err(INVALID_JSON_VIOLATION),
962 }
963 }
964
965 #[cfg(test)]
966 mod tests {
967 use super::*;
968
969 #[test]
970 fn parses_simple_object() {
971 let v = JsonValue::parse(br#"{"foo":"bar"}"#).expect("valid");
972 assert_eq!(v.bytes[0], TAG_OBJECT);
973 }
974
975 #[test]
976 fn rejects_invalid_json() {
977 let err = JsonValue::parse(b"not json").expect_err("must reject");
978 assert_eq!(err.shape_iri, INVALID_JSON_VIOLATION.shape_iri);
979 }
980
981 #[test]
982 fn rejects_overdeep_recursion() {
983 use alloc::string::String;
984 let mut s = String::new();
985 for _ in 0..(MAX_JSON_DEPTH + 2) {
986 s.push('[');
987 }
988 for _ in 0..(MAX_JSON_DEPTH + 2) {
989 s.push(']');
990 }
991 let err = JsonValue::parse(s.as_bytes()).expect_err("must reject");
992 assert_eq!(err.constraint_iri, DEPTH_BOUND_VIOLATION.constraint_iri);
993 }
994
995 #[test]
996 fn accepts_unbounded_string_width() {
997 use alloc::format;
998 use alloc::string::String;
999 let big: String = "a".repeat(200_000);
1000 let raw = format!("\"{big}\"");
1001 let canon = canonicalize(raw.as_bytes()).expect("unbounded string admitted");
1002 assert_eq!(canon.len(), big.len() + 2);
1003 }
1004
1005 const CANONICAL_FIXTURES: &[(&[u8], &[u8])] = &[
1006 (br#"{"foo":"bar"}"#, br#"{"foo":"bar"}"#),
1007 (br#"{"b": 1, "a": 2}"#, br#"{"a":2,"b":1}"#),
1008 (
1009 br#"{"nested": {"deep": {"value": "found"}}}"#,
1010 br#"{"nested":{"deep":{"value":"found"}}}"#,
1011 ),
1012 (
1013 br#"{"int": 42, "bool": true, "null_val": null}"#,
1014 br#"{"bool":true,"int":42,"null_val":null}"#,
1015 ),
1016 (b"[1, 2, 3]", b"[1,2,3]"),
1017 (br#"["a", "b", "c"]"#, br#"["a","b","c"]"#),
1018 ];
1019
1020 #[test]
1021 fn canonicalizer_matches_reference_for_inline_fixtures() {
1022 for (raw, expected) in CANONICAL_FIXTURES {
1023 let canon = canonicalize(raw).expect("valid");
1024 assert_eq!(canon, *expected, "raw={raw:?}");
1025 }
1026 }
1027
1028 #[test]
1029 fn canonicalizer_collapses_unicode_decomposed_to_composed() {
1030 let decomposed = "{\"name\": \"cafe\u{0301}\"}".as_bytes();
1031 let composed = "{\"name\":\"caf\u{00E9}\"}".as_bytes();
1032 assert_eq!(
1033 canonicalize(decomposed).expect("valid"),
1034 canonicalize(composed).expect("valid")
1035 );
1036 }
1037
1038 #[test]
1039 fn canonicalize_is_idempotent_on_its_own_output() {
1040 for (raw, _expected) in CANONICAL_FIXTURES {
1041 let once = canonicalize(raw).expect("valid");
1042 let twice = canonicalize(&once).expect("re-canonicalises");
1043 assert_eq!(once, twice, "idempotence broken for {raw:?}");
1044 }
1045 }
1046 }
1047}