Skip to main content

vr_jcs/
lib.rs

1//! # vr-jcs
2//!
3//! RFC 8785 JSON Canonicalization Scheme (JCS) for Rust.
4//!
5//! Produces canonical JSON suitable for deterministic digest computation,
6//! content hashing, and stable serialization boundaries. Implements the
7//! RFC 8785 rules that materially affect wire compatibility:
8//! - UTF-16 code-unit sorting for object property names
9//! - ECMAScript-compatible primitive serialization
10//! - UTF-8 output without insignificant whitespace
11//! - duplicate-property rejection on raw JSON parse paths
12//! - I-JSON string / number validation
13//!
14//! ## API
15//!
16//! ### Strict path (for untrusted JSON)
17//!
18//! - [`to_canon_bytes_from_slice`] — Parse untrusted JSON, apply strict admission checks, emit canonical bytes
19//! - [`to_canon_string_from_str`] — Parse untrusted JSON string, apply strict admission checks, emit canonical string
20//!
21//! ### Typed path (caller-controlled construction only, deprecated)
22//!
23//! - [`to_canon_bytes`] — Serialize any `Serialize` type to canonical JSON bytes
24//! - [`to_canon_string`] — Serialize any `Serialize` type to a canonical JSON string
25//!
26//! ### In-place
27//!
28//! - [`canonicalize`] — Sort object keys recursively in a `serde_json::Value`
29//!
30//! ### Canonical digest
31//!
32//! Canonicalization is a schema decision; digest algorithm choice (BLAKE3 vs.
33//! keyed BLAKE3 vs. domain-separated BLAKE3 vs. SHA-256 vs. …) is a separate
34//! cryptographic / governance decision. The digest surface reflects that split:
35//!
36//! **Strategy-bearing (primary) path** — for any site whose digest algorithm
37//! is or may become a policy variable:
38//!
39//! - [`DigestAlgorithm`] — the algorithm enum.
40//! - [`DigestStrategy`] — an algorithm plus any future policy knobs.
41//! - [`CanonicalDigest`] — typed output that remembers which algorithm produced it.
42//! - [`to_canon_digest_with`] — canonicalize `value`, digest under `strategy`.
43//!
44//! **BLAKE3 fixed-policy convenience** — for sites where receipt policy has
45//! explicitly frozen the algorithm to plain BLAKE3:
46//!
47//! - [`to_canon_blake3_digest`] — `&Value`  → `[u8; 32]`.
48//! - [`to_canon_blake3_digest_from_slice`] — strict-parse `&[u8]` → `[u8; 32]`.
49//!
50//! These convenience wrappers are equivalent to calling
51//! [`to_canon_digest_with`] with [`DigestStrategy::blake3_untagged`] and
52//! extracting `bytes`.
53//!
54//! The lexical invariant is: canonicalization and digest must travel together
55//! through one call. Receipt-bound and constitutional code paths MUST use the
56//! strategy-bearing or fixed-BLAKE3 wrappers instead of pairing
57//! `to_canon_bytes_*` with `blake3::hash` manually.
58//!
59//! ## Usage
60//!
61//! ```
62//! # fn main() -> Result<(), vr_jcs::JcsError> {
63//! let json = vr_jcs::to_canon_string_from_str(r#"{"z_field":1,"a_field":2}"#)?;
64//! assert_eq!(json, r#"{"a_field":2,"z_field":1}"#);
65//! # Ok(())
66//! # }
67//! ```
68
69use std::cmp::Ordering;
70use std::collections::BTreeSet;
71use std::fmt;
72
73use serde::de::{self, DeserializeSeed, Error as DeError, MapAccess, SeqAccess, Visitor};
74use serde::{Deserializer, Serialize};
75use serde_json::{Number, Value};
76
77/// Maximum permitted nesting depth for JSON structures (128).
78pub const MAX_NESTING_DEPTH: usize = 128;
79
80mod error;
81pub use error::{JcsError, JcsErrorInfo};
82
83// ── Public API ─────────────────────────────────────────────────────
84
85/// Serialize any `Serialize` type to canonical JSON bytes.
86///
87/// The typed `Serialize` path is not authoritative for untrusted raw JSON
88/// because it does not control parse-time object-member admission. For
89/// untrusted input, use [`to_canon_bytes_from_slice`] instead.
90///
91/// # Errors
92///
93/// Returns:
94/// - [`JcsError::Json`] if serialization to JSON fails
95/// - [`JcsError::InvalidString`] if a string contains an I-JSON forbidden code point
96/// - [`JcsError::InvalidNumber`] if a number is not interoperable under JCS
97/// - [`JcsError::NestingDepthExceeded`] if the value exceeds [`MAX_NESTING_DEPTH`]
98#[deprecated(
99    since = "0.3.0",
100    note = "use to_canon_bytes_from_slice for untrusted input; see PUBLIC_SURFACE.md"
101)]
102pub fn to_canon_bytes<T: Serialize>(value: &T) -> Result<Vec<u8>, JcsError> {
103    let value = serde_json::to_value(value)?;
104    to_canon_bytes_value(&value)
105}
106
107/// Serialize any `Serialize` type to a canonical JSON string.
108///
109/// # Errors
110///
111/// Returns:
112/// - [`JcsError::Json`] if serialization to JSON fails
113/// - [`JcsError::InvalidString`] if a string contains an I-JSON forbidden code point
114/// - [`JcsError::InvalidNumber`] if a number is not interoperable under JCS
115/// - [`JcsError::NestingDepthExceeded`] if the value exceeds [`MAX_NESTING_DEPTH`]
116#[deprecated(
117    since = "0.3.0",
118    note = "use to_canon_string_from_str for untrusted input; see PUBLIC_SURFACE.md"
119)]
120pub fn to_canon_string<T: Serialize>(value: &T) -> Result<String, JcsError> {
121    let value = serde_json::to_value(value)?;
122    let bytes = to_canon_bytes_value(&value)?;
123    String::from_utf8(bytes).map_err(|error| {
124        JcsError::InvalidString(format!(
125            "canonical JSON output was not valid UTF-8: {error}"
126        ))
127    })
128}
129
130/// Parse untrusted JSON, apply strict admission checks, and emit canonical
131/// RFC 8785 bytes.
132///
133/// Rejects duplicate property names, validates I-JSON string and number
134/// constraints, and enforces [`MAX_NESTING_DEPTH`]. Accepts any valid JSON
135/// formatting (including pretty-printed input) and canonicalizes it.
136///
137/// # Errors
138///
139/// Returns [`JcsError::Json`] for malformed JSON or duplicate property names,
140/// [`JcsError::InvalidString`] or [`JcsError::InvalidNumber`] for I-JSON
141/// violations, and [`JcsError::NestingDepthExceeded`] for depth limit breach.
142pub fn to_canon_bytes_from_slice(json: &[u8]) -> Result<Vec<u8>, JcsError> {
143    let value = parse_json_value_no_duplicates(json)?;
144    to_canon_bytes_value(&value)
145}
146
147/// Parse untrusted JSON text, apply strict admission checks, and emit a
148/// canonical RFC 8785 string.
149///
150/// # Errors
151///
152/// Returns the same errors as [`to_canon_bytes_from_slice`].
153pub fn to_canon_string_from_str(json: &str) -> Result<String, JcsError> {
154    let bytes = to_canon_bytes_from_slice(json.as_bytes())?;
155    String::from_utf8(bytes).map_err(|error| {
156        JcsError::InvalidString(format!(
157            "canonical JSON output was not valid UTF-8: {error}"
158        ))
159    })
160}
161
162/// Newtype wrapper over canonical JCS output bytes.
163///
164/// Construction is restricted to this crate — callers obtain a
165/// [`CanonicalBytes`] only by routing through [`canonical_bytes_from_slice`]
166/// (or the wrappers in `vertrule-core::determinism`). The type exists so
167/// digest, signature, and receipt APIs can statically require "bytes that
168/// came out of JCS" rather than accepting any `&[u8]`. Every coercion back
169/// to `&[u8]` goes through the explicit [`Self::as_slice`] method — there
170/// is no `AsRef<[u8]>` or `Deref` impl, so escapes are greppable.
171///
172/// The `Debug` impl deliberately shows the byte length and not the bytes.
173/// Dumping raw canonical JSON into a log is a common way to accidentally
174/// leak receipt contents; callers that want the bytes must ask for them.
175#[derive(Clone, PartialEq, Eq)]
176pub struct CanonicalBytes(Vec<u8>);
177
178impl CanonicalBytes {
179    /// Construct from already-canonicalized bytes. Crate-private: the only
180    /// way to get a [`CanonicalBytes`] is to feed input through
181    /// [`canonical_bytes_from_slice`] (or the re-export in
182    /// `vertrule-core::determinism::to_canon_bytes_wrapped`).
183    pub(crate) const fn from_jcs(bytes: Vec<u8>) -> Self {
184        Self(bytes)
185    }
186
187    /// Explicit escape hatch to a byte slice. Named so reviewers can grep
188    /// for the boundary where canonical-bytes discipline is dropped (e.g.,
189    /// feeding wire bytes to `blake3::hash`).
190    #[must_use]
191    pub fn as_slice(&self) -> &[u8] {
192        &self.0
193    }
194
195    /// Length in bytes.
196    #[must_use]
197    pub fn len(&self) -> usize {
198        self.0.len()
199    }
200
201    /// True when the canonical output is empty (only possible for an empty
202    /// input in degenerate paths; not reachable from the primary API).
203    #[must_use]
204    pub fn is_empty(&self) -> bool {
205        self.0.is_empty()
206    }
207
208    /// Consume the wrapper and return the underlying byte buffer. Use at
209    /// wire boundaries where ownership is transferred (file write, network
210    /// send). Not an implicit coercion.
211    #[must_use]
212    pub fn into_vec(self) -> Vec<u8> {
213        self.0
214    }
215}
216
217impl fmt::Debug for CanonicalBytes {
218    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
219        f.debug_struct("CanonicalBytes")
220            .field("len", &self.0.len())
221            .finish_non_exhaustive()
222    }
223}
224
225/// Parse untrusted JSON, apply strict admission checks, and return the
226/// canonical RFC 8785 bytes inside a [`CanonicalBytes`] wrapper.
227///
228/// Prefer this over [`to_canon_bytes_from_slice`] for any path that will
229/// feed the bytes into a digest, signature, or receipt primitive — the
230/// wrapper makes "came out of JCS" a type-level fact.
231///
232/// # Errors
233///
234/// Returns the same errors as [`to_canon_bytes_from_slice`].
235pub fn canonical_bytes_from_slice(json: &[u8]) -> Result<CanonicalBytes, JcsError> {
236    to_canon_bytes_from_slice(json).map(CanonicalBytes::from_jcs)
237}
238
239/// Digest algorithm variant.
240///
241/// Explicit enum so the algorithm choice is a named governance decision
242/// rather than an implicit default. [`Blake3Untagged`](Self::Blake3Untagged)
243/// is the plain `blake3::hash` pattern; keyed and domain-separated variants
244/// carry their differentiating input so two uses with different keys or
245/// contexts cannot accidentally collide on a digest.
246///
247/// [`Sha256`](Self::Sha256) is declared but not yet wired in this crate;
248/// requesting it returns [`JcsError::UnsupportedAlgorithm`]. The variant
249/// exists so receipt schemas and policy packs can reference it without
250/// waiting for the implementation.
251#[derive(Clone, Debug, PartialEq, Eq)]
252#[non_exhaustive]
253pub enum DigestAlgorithm {
254    /// BLAKE3 plain `hash(canonical_bytes)`.
255    Blake3Untagged,
256    /// BLAKE3 keyed with a 32-byte key: `keyed_hash(key, canonical_bytes)`.
257    Blake3Keyed {
258        /// Domain key. Value is load-bearing for the digest; choose it once
259        /// per receipt domain and never reuse across unrelated domains.
260        key: [u8; 32],
261    },
262    /// BLAKE3 domain-separated via `derive_key(context, canonical_bytes)`.
263    /// The context string is the domain identifier (must be a compile-time
264    /// constant in user code — BLAKE3 semantics require it to be globally
265    /// unique per domain).
266    Blake3DomainSeparated {
267        /// Domain context string.
268        context: String,
269    },
270    /// SHA-256 over canonical bytes. Declared in the API but not yet wired.
271    Sha256,
272}
273
274impl DigestAlgorithm {
275    /// Short stable name suitable for use in receipt schemas and logs.
276    #[must_use]
277    pub const fn name(&self) -> &'static str {
278        match self {
279            Self::Blake3Untagged => "blake3-untagged",
280            Self::Blake3Keyed { .. } => "blake3-keyed",
281            Self::Blake3DomainSeparated { .. } => "blake3-domain-separated",
282            Self::Sha256 => "sha256",
283        }
284    }
285}
286
287/// A digest strategy bundles the algorithm with any future policy knobs
288/// (output truncation, pre-hash prefix, etc.). Today it's a thin newtype;
289/// the wrapper exists so extensions don't churn call sites.
290#[derive(Clone, Debug, PartialEq, Eq)]
291pub struct DigestStrategy {
292    /// The algorithm to apply.
293    pub algorithm: DigestAlgorithm,
294}
295
296impl DigestStrategy {
297    /// Plain untagged BLAKE3 over canonical bytes.
298    #[must_use]
299    pub const fn blake3_untagged() -> Self {
300        Self {
301            algorithm: DigestAlgorithm::Blake3Untagged,
302        }
303    }
304
305    /// Keyed BLAKE3 over canonical bytes.
306    #[must_use]
307    pub const fn blake3_keyed(key: [u8; 32]) -> Self {
308        Self {
309            algorithm: DigestAlgorithm::Blake3Keyed { key },
310        }
311    }
312
313    /// Domain-separated BLAKE3 over canonical bytes.
314    #[must_use]
315    pub fn blake3_domain_separated(context: impl Into<String>) -> Self {
316        Self {
317            algorithm: DigestAlgorithm::Blake3DomainSeparated {
318                context: context.into(),
319            },
320        }
321    }
322
323    /// SHA-256 over canonical bytes. Presently returns
324    /// [`JcsError::UnsupportedAlgorithm`] at call time; the constructor is
325    /// provided so policy code can reference it today.
326    #[must_use]
327    pub const fn sha256() -> Self {
328        Self {
329            algorithm: DigestAlgorithm::Sha256,
330        }
331    }
332}
333
334/// Typed output of a canonical digest computation.
335///
336/// Carries the algorithm that produced `bytes` so downstream consumers
337/// (receipt envelopes, audit logs) can record the algorithm without
338/// out-of-band convention.
339#[derive(Clone, Debug, PartialEq, Eq)]
340pub struct CanonicalDigest {
341    /// The algorithm used.
342    pub algorithm: DigestAlgorithm,
343    /// Raw digest bytes. Length depends on the algorithm (32 for BLAKE3
344    /// variants, 32 for SHA-256 once wired).
345    pub bytes: Vec<u8>,
346}
347
348/// Canonicalize a trusted `serde_json::Value` and digest the canonical bytes
349/// under the given strategy.
350///
351/// The value is assumed to come from caller-controlled construction. For
352/// untrusted input, use [`to_canon_bytes_from_slice`] first (strict
353/// admission) and then pass the resulting `Value` back through this function
354/// — or use a strict-parse sibling when one exists for the target strategy.
355///
356/// # Errors
357///
358/// Returns:
359/// - [`JcsError::Json`] if the value cannot be canonicalized
360/// - [`JcsError::InvalidString`] for I-JSON forbidden code points
361/// - [`JcsError::InvalidNumber`] for non-interoperable numbers
362/// - [`JcsError::NestingDepthExceeded`] for values beyond [`MAX_NESTING_DEPTH`]
363/// - [`JcsError::UnsupportedAlgorithm`] if the strategy names an algorithm
364///   not wired in this build (e.g. SHA-256 today)
365pub fn to_canon_digest_with(
366    value: &Value,
367    strategy: &DigestStrategy,
368) -> Result<CanonicalDigest, JcsError> {
369    let bytes = to_canon_bytes_value(value)?;
370    let digest_bytes = match &strategy.algorithm {
371        DigestAlgorithm::Blake3Untagged => blake3::hash(&bytes).as_bytes().to_vec(),
372        DigestAlgorithm::Blake3Keyed { key } => blake3::keyed_hash(key, &bytes).as_bytes().to_vec(),
373        DigestAlgorithm::Blake3DomainSeparated { context } => {
374            // BLAKE3 derive_key is the standard domain-separated digest:
375            // context is the domain identifier, key_material is the data.
376            blake3::derive_key(context, &bytes).to_vec()
377        }
378        DigestAlgorithm::Sha256 => {
379            return Err(JcsError::UnsupportedAlgorithm(
380                "SHA-256 over canonical bytes is declared in the API but not \
381                 wired in this build; open a follow-up to add the sha2 dep"
382                    .to_string(),
383            ));
384        }
385    };
386    Ok(CanonicalDigest {
387        algorithm: strategy.algorithm.clone(),
388        bytes: digest_bytes,
389    })
390}
391
392/// BLAKE3 fixed-policy convenience. Canonicalize `value` and return
393/// `blake3::hash(canonical_bytes)` as a 32-byte array.
394///
395/// Use this only at sites where the receipt convention explicitly fixes the
396/// algorithm to plain BLAKE3. For anything else, use [`to_canon_digest_with`]
397/// with an explicit [`DigestStrategy`].
398///
399/// # Errors
400///
401/// Same as [`to_canon_digest_with`] minus [`JcsError::UnsupportedAlgorithm`].
402pub fn to_canon_blake3_digest(value: &Value) -> Result<[u8; 32], JcsError> {
403    let bytes = to_canon_bytes_value(value)?;
404    Ok(*blake3::hash(&bytes).as_bytes())
405}
406
407/// Strict-parse sibling of [`to_canon_blake3_digest`] for untrusted JSON
408/// bytes.
409///
410/// # Errors
411///
412/// Returns [`JcsError::Json`] for malformed JSON or duplicate property names,
413/// [`JcsError::InvalidString`] or [`JcsError::InvalidNumber`] for I-JSON
414/// violations, and [`JcsError::NestingDepthExceeded`] for depth limit breach.
415pub fn to_canon_blake3_digest_from_slice(json: &[u8]) -> Result<[u8; 32], JcsError> {
416    let bytes = to_canon_bytes_from_slice(json)?;
417    Ok(*blake3::hash(&bytes).as_bytes())
418}
419
420/// Recursively sort all object keys in a JSON value for canonical representation.
421///
422/// This function modifies the value in place, sorting all object keys
423/// by UTF-16 code units (RFC 8785) and recursively processing nested
424/// structures. Array element order is preserved.
425///
426/// For digest computation, prefer [`to_canon_bytes_from_slice`] which
427/// handles the full strict parse + canonical emit pipeline.
428///
429/// # Errors
430///
431/// Returns [`JcsError::NestingDepthExceeded`] if the value exceeds
432/// [`MAX_NESTING_DEPTH`].
433pub fn canonicalize(v: &mut Value) -> Result<(), JcsError> {
434    canonicalize_depth(v, 0)
435}
436
437fn canonicalize_depth(v: &mut Value, depth: usize) -> Result<(), JcsError> {
438    if depth > MAX_NESTING_DEPTH {
439        return Err(JcsError::NestingDepthExceeded);
440    }
441    match v {
442        Value::Object(map) => {
443            let mut entries: Vec<(String, Value)> = std::mem::take(map).into_iter().collect();
444            entries.sort_by(|(a, _), (b, _)| cmp_utf16(a, b));
445            for (key, mut value) in entries {
446                canonicalize_depth(&mut value, depth + 1)?;
447                map.insert(key, value);
448            }
449        }
450        Value::Array(arr) => {
451            for x in arr {
452                canonicalize_depth(x, depth + 1)?;
453            }
454        }
455        _ => {}
456    }
457    Ok(())
458}
459
460// ── Sibling-crate helpers ─────────────────────────────────────────
461//
462// `#[doc(hidden)]` and not part of the stable API. Subject to change
463// or removal without semver bump.
464
465/// Deserialize a JSON value while rejecting duplicate property names.
466///
467/// Used by `vertrule-schemas` for ingestion validation.
468///
469/// # Errors
470///
471/// Returns an error if the input contains duplicate property names,
472/// forbidden noncharacters, or is otherwise invalid JSON.
473#[doc(hidden)]
474pub fn deserialize_json_value_no_duplicates<'de, D>(deserializer: D) -> Result<Value, D::Error>
475where
476    D: Deserializer<'de>,
477{
478    NoDuplicateValueSeed { depth: 0 }.deserialize(deserializer)
479}
480
481/// Validate that a string contains no I-JSON forbidden noncharacters.
482///
483/// # Errors
484///
485/// Returns a description of the violation if the string contains a
486/// forbidden Unicode noncharacter (U+FDD0..U+FDEF, U+xFFFE, U+xFFFF).
487#[doc(hidden)]
488pub fn validate_string_contents(value: &str, context: &str) -> Result<(), String> {
489    if let Some(ch) = value.chars().find(|&ch| is_noncharacter(ch)) {
490        return Err(format!(
491            "{context} contains the forbidden noncharacter U+{:04X}",
492            ch as u32
493        ));
494    }
495    Ok(())
496}
497
498/// Check if an integer is in the I-JSON safe integer range `[-2^53+1, 2^53-1]`.
499#[doc(hidden)]
500#[must_use]
501pub fn is_safe_integer(value: i64) -> bool {
502    (-MAX_SAFE_INTEGER..=MAX_SAFE_INTEGER).contains(&value)
503}
504
505// ── Internal implementation ────────────────────────────────────────
506
507const MAX_SAFE_INTEGER: i64 = 9_007_199_254_740_991;
508
509fn to_canon_bytes_value(value: &Value) -> Result<Vec<u8>, JcsError> {
510    let mut out = Vec::new();
511    emit_value(&mut out, value, 0)?;
512    Ok(out)
513}
514
515fn emit_value(out: &mut Vec<u8>, value: &Value, depth: usize) -> Result<(), JcsError> {
516    if depth > MAX_NESTING_DEPTH {
517        return Err(JcsError::NestingDepthExceeded);
518    }
519    match value {
520        Value::Null => out.extend_from_slice(b"null"),
521        Value::Bool(boolean) => {
522            if *boolean {
523                out.extend_from_slice(b"true");
524            } else {
525                out.extend_from_slice(b"false");
526            }
527        }
528        Value::Number(number) => emit_number(out, number)?,
529        Value::String(string) => emit_string(out, string, "string value")?,
530        Value::Array(array) => {
531            out.push(b'[');
532            for (index, item) in array.iter().enumerate() {
533                if index > 0 {
534                    out.push(b',');
535                }
536                emit_value(out, item, depth + 1)?;
537            }
538            out.push(b']');
539        }
540        Value::Object(object) => {
541            out.push(b'{');
542            let mut entries: Vec<_> = object.iter().collect();
543            entries.sort_by(|(left, _), (right, _)| cmp_utf16(left, right));
544
545            for (index, (key, item)) in entries.iter().enumerate() {
546                if index > 0 {
547                    out.push(b',');
548                }
549                emit_string(out, key, "object property name")?;
550                out.push(b':');
551                emit_value(out, item, depth + 1)?;
552            }
553            out.push(b'}');
554        }
555    }
556    Ok(())
557}
558
559fn emit_number(out: &mut Vec<u8>, number: &Number) -> Result<(), JcsError> {
560    if let Some(value) = number.as_i64() {
561        let s = value.to_string();
562        ensure_exact_binary64_integer(value.unsigned_abs(), &s)?;
563        out.extend_from_slice(s.as_bytes());
564        return Ok(());
565    }
566
567    if let Some(value) = number.as_u64() {
568        let s = value.to_string();
569        ensure_exact_binary64_integer(value, &s)?;
570        out.extend_from_slice(s.as_bytes());
571        return Ok(());
572    }
573
574    if let Some(value) = number.as_f64() {
575        if !value.is_finite() {
576            return Err(JcsError::InvalidNumber(
577                "encountered a non-finite floating-point number".to_string(),
578            ));
579        }
580
581        let rendered = format_ecmascript_number(value)?;
582        out.extend_from_slice(rendered.as_bytes());
583        return Ok(());
584    }
585
586    Err(JcsError::InvalidNumber(
587        "unsupported JSON number representation".to_string(),
588    ))
589}
590
591fn emit_string(out: &mut Vec<u8>, value: &str, context: &str) -> Result<(), JcsError> {
592    validate_string_contents(value, context).map_err(JcsError::InvalidString)?;
593
594    out.push(b'"');
595    for ch in value.chars() {
596        match ch {
597            '"' => out.extend_from_slice(br#"\""#),
598            '\\' => out.extend_from_slice(br"\\"),
599            '\u{0008}' => out.extend_from_slice(br"\b"),
600            '\u{0009}' => out.extend_from_slice(br"\t"),
601            '\u{000A}' => out.extend_from_slice(br"\n"),
602            '\u{000C}' => out.extend_from_slice(br"\f"),
603            '\u{000D}' => out.extend_from_slice(br"\r"),
604            '\u{0000}'..='\u{001F}' => {
605                let escaped = format!(r"\u{:04x}", ch as u32);
606                out.extend_from_slice(escaped.as_bytes());
607            }
608            _ => {
609                let mut buf = [0u8; 4];
610                let encoded = ch.encode_utf8(&mut buf);
611                out.extend_from_slice(encoded.as_bytes());
612            }
613        }
614    }
615    out.push(b'"');
616
617    Ok(())
618}
619
620fn ensure_exact_binary64_integer(value: u64, original: &str) -> Result<(), JcsError> {
621    if is_exact_binary64_integer(value) {
622        Ok(())
623    } else {
624        Err(JcsError::InvalidNumber(format!(
625            "integer {original} is not exactly representable as an IEEE 754 double; encode it as a string"
626        )))
627    }
628}
629
630const fn is_exact_binary64_integer(value: u64) -> bool {
631    if value == 0 {
632        return true;
633    }
634    let bit_len = u64::BITS - value.leading_zeros();
635    bit_len <= 53 || value.trailing_zeros() >= bit_len - 53
636}
637
638fn format_ecmascript_number(value: f64) -> Result<String, JcsError> {
639    if value == 0.0 {
640        return Ok("0".to_string());
641    }
642
643    let mut buffer = zmij::Buffer::new();
644    let shortest = buffer.format_finite(value);
645    let (negative, body) = if let Some(stripped) = shortest.strip_prefix('-') {
646        (true, stripped)
647    } else {
648        (false, shortest)
649    };
650
651    let (digits, exponent) = parse_shortest_decimal(body)?;
652    let rendered = render_ecmascript_number(&digits, exponent)?;
653
654    if negative {
655        Ok(format!("-{rendered}"))
656    } else {
657        Ok(rendered)
658    }
659}
660
661fn parse_shortest_decimal(body: &str) -> Result<(String, i32), JcsError> {
662    if let Some((mantissa, exponent)) = body.split_once('e') {
663        let digits: String = mantissa.chars().filter(|&ch| ch != '.').collect();
664        let exponent = exponent.parse::<i32>().map_err(|error| {
665            JcsError::InvalidNumber(format!(
666                "failed to parse formatter exponent {exponent:?}: {error}"
667            ))
668        })?;
669        return Ok((digits, exponent + 1));
670    }
671
672    if let Some((integer, fractional)) = body.split_once('.') {
673        let fractional = fractional.trim_end_matches('0');
674
675        if integer != "0" {
676            let mut digits = String::with_capacity(integer.len() + fractional.len());
677            digits.push_str(integer);
678            digits.push_str(fractional);
679            let exponent = i32::try_from(integer.len()).map_err(|_| {
680                JcsError::InvalidNumber(
681                    "formatter emitted an unexpectedly large integer part".to_string(),
682                )
683            })?;
684            return Ok((digits, exponent));
685        }
686
687        let leading_zeros = fractional.bytes().take_while(|&byte| byte == b'0').count();
688        let exponent = i32::try_from(leading_zeros).map_err(|_| {
689            JcsError::InvalidNumber(
690                "formatter emitted an unexpectedly long leading-zero run".to_string(),
691            )
692        })?;
693        return Ok((fractional[leading_zeros..].to_owned(), -exponent));
694    }
695
696    let exponent = i32::try_from(body.len()).map_err(|_| {
697        JcsError::InvalidNumber("formatter emitted an unexpectedly long integer".to_string())
698    })?;
699    Ok((body.to_owned(), exponent))
700}
701
702fn render_ecmascript_number(digits: &str, exponent: i32) -> Result<String, JcsError> {
703    let digits_len = i32::try_from(digits.len()).map_err(|_| {
704        JcsError::InvalidNumber("formatter emitted an unexpectedly long digit sequence".to_string())
705    })?;
706    if digits_len == 0 {
707        return Err(JcsError::InvalidNumber("empty digit sequence".to_string()));
708    }
709
710    if digits_len <= exponent && exponent <= 21 {
711        let capacity = usize::try_from(exponent).map_err(|_| {
712            JcsError::InvalidNumber(
713                "formatter produced a negative fixed-width exponent".to_string(),
714            )
715        })?;
716        let mut out = String::with_capacity(capacity);
717        out.push_str(digits);
718        for _ in 0..(exponent - digits_len) {
719            out.push('0');
720        }
721        return Ok(out);
722    }
723
724    if 0 < exponent && exponent <= 21 {
725        let split = usize::try_from(exponent).map_err(|_| {
726            JcsError::InvalidNumber("formatter produced a negative split exponent".to_string())
727        })?;
728        let mut out = String::with_capacity(digits.len() + 1);
729        out.push_str(&digits[..split]);
730        out.push('.');
731        out.push_str(&digits[split..]);
732        return Ok(out);
733    }
734
735    if -6 < exponent && exponent <= 0 {
736        let zeros = usize::try_from(-exponent).map_err(|_| {
737            JcsError::InvalidNumber("formatter produced an invalid negative exponent".to_string())
738        })?;
739        let mut out = String::with_capacity(2 + zeros + digits.len());
740        out.push_str("0.");
741        for _ in 0..zeros {
742            out.push('0');
743        }
744        out.push_str(digits);
745        return Ok(out);
746    }
747
748    let exponent = exponent - 1;
749    let (first, rest) = digits.split_at(1);
750    let mut out = String::with_capacity(digits.len() + 6);
751    out.push_str(first);
752    if !rest.is_empty() {
753        out.push('.');
754        out.push_str(rest);
755    }
756    out.push('e');
757    if exponent >= 0 {
758        out.push('+');
759    }
760    out.push_str(&exponent.to_string());
761    Ok(out)
762}
763
764fn cmp_utf16(left: &str, right: &str) -> Ordering {
765    left.encode_utf16().cmp(right.encode_utf16())
766}
767
768fn is_noncharacter(ch: char) -> bool {
769    let code = ch as u32;
770    (0xFDD0..=0xFDEF).contains(&code) || code & 0xFFFE == 0xFFFE
771}
772
773/// Sentinel prefix used by `NoDuplicateValueSeed` to signal depth exceeded
774/// through serde's error channel. Matched in `parse_json_value_no_duplicates`
775/// to promote the error to `JcsError::NestingDepthExceeded`.
776const DEPTH_EXCEEDED_SENTINEL: &str = "nesting depth exceeded maximum of ";
777
778fn parse_json_value_no_duplicates(json: &[u8]) -> Result<Value, JcsError> {
779    let mut deserializer = serde_json::Deserializer::from_slice(json);
780    // Disable serde_json's built-in recursion limit — we enforce
781    // MAX_NESTING_DEPTH via NoDuplicateValueSeed instead.
782    deserializer.disable_recursion_limit();
783    let value = deserialize_json_value_no_duplicates(&mut deserializer).map_err(|e| {
784        if e.to_string().starts_with(DEPTH_EXCEEDED_SENTINEL) {
785            JcsError::NestingDepthExceeded
786        } else {
787            JcsError::Json(e)
788        }
789    })?;
790    deserializer.end()?;
791    Ok(value)
792}
793
794struct NoDuplicateValueSeed {
795    depth: usize,
796}
797
798impl<'de> DeserializeSeed<'de> for NoDuplicateValueSeed {
799    type Value = Value;
800
801    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
802    where
803        D: Deserializer<'de>,
804    {
805        if self.depth > MAX_NESTING_DEPTH {
806            return Err(D::Error::custom(format!(
807                "{DEPTH_EXCEEDED_SENTINEL}{MAX_NESTING_DEPTH}"
808            )));
809        }
810        deserializer.deserialize_any(NoDuplicateValueVisitor { depth: self.depth })
811    }
812}
813
814struct NoDuplicateValueVisitor {
815    depth: usize,
816}
817
818impl<'de> Visitor<'de> for NoDuplicateValueVisitor {
819    type Value = Value;
820
821    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
822        formatter.write_str("a valid JSON value")
823    }
824
825    fn visit_bool<E>(self, value: bool) -> Result<Self::Value, E> {
826        Ok(Value::Bool(value))
827    }
828
829    fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E> {
830        Ok(Value::Number(Number::from(value)))
831    }
832
833    fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E> {
834        Ok(Value::Number(Number::from(value)))
835    }
836
837    fn visit_f64<E>(self, value: f64) -> Result<Self::Value, E>
838    where
839        E: de::Error,
840    {
841        Number::from_f64(value)
842            .map(Value::Number)
843            .ok_or_else(|| E::custom("encountered a non-finite floating-point number"))
844    }
845
846    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
847    where
848        E: de::Error,
849    {
850        validate_string_contents(value, "string value").map_err(E::custom)?;
851        Ok(Value::String(value.to_owned()))
852    }
853
854    fn visit_borrowed_str<E>(self, value: &'de str) -> Result<Self::Value, E>
855    where
856        E: de::Error,
857    {
858        self.visit_str(value)
859    }
860
861    fn visit_string<E>(self, value: String) -> Result<Self::Value, E>
862    where
863        E: de::Error,
864    {
865        validate_string_contents(&value, "string value").map_err(E::custom)?;
866        Ok(Value::String(value))
867    }
868
869    fn visit_none<E>(self) -> Result<Self::Value, E> {
870        Ok(Value::Null)
871    }
872
873    fn visit_unit<E>(self) -> Result<Self::Value, E> {
874        Ok(Value::Null)
875    }
876
877    fn visit_seq<A>(self, mut access: A) -> Result<Self::Value, A::Error>
878    where
879        A: SeqAccess<'de>,
880    {
881        let mut values = Vec::with_capacity(access.size_hint().unwrap_or(0));
882        while let Some(value) = access.next_element_seed(NoDuplicateValueSeed {
883            depth: self.depth + 1,
884        })? {
885            values.push(value);
886        }
887        Ok(Value::Array(values))
888    }
889
890    fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
891    where
892        A: MapAccess<'de>,
893    {
894        let Some(first_key) = access.next_key::<String>()? else {
895            return Ok(Value::Object(serde_json::Map::new()));
896        };
897
898        // Skip string validation for '$'-prefixed keys: serde_json uses
899        // internal sentinels (e.g. "$serde_json::private::Number") under
900        // arbitrary_precision. This intentionally over-matches — a user
901        // key like "$ref" containing a noncharacter would bypass
902        // validation. Acceptable because noncharacters in '$'-prefixed
903        // keys are vanishingly unlikely in practice.
904        if !first_key.starts_with('$') {
905            validate_string_contents(&first_key, "object property name")
906                .map_err(A::Error::custom)?;
907        }
908
909        let first_value = access.next_value_seed(NoDuplicateValueSeed {
910            depth: self.depth + 1,
911        })?;
912
913        let mut object = serde_json::Map::new();
914        object.insert(first_key.clone(), first_value);
915
916        let mut seen = BTreeSet::new();
917        seen.insert(first_key);
918
919        while let Some(key) = access.next_key::<String>()? {
920            // Same '$'-prefix skip as above (see first-key comment).
921            if !key.starts_with('$') {
922                validate_string_contents(&key, "object property name").map_err(A::Error::custom)?;
923            }
924
925            if !seen.insert(key.clone()) {
926                return Err(A::Error::custom(format!("duplicate property name `{key}`")));
927            }
928
929            let value = access.next_value_seed(NoDuplicateValueSeed {
930                depth: self.depth + 1,
931            })?;
932            object.insert(key, value);
933        }
934
935        // If the map is a serde_json internal number representation,
936        // serde_json::from_value will reconstruct the proper Number.
937        // For real JSON objects, this is a no-op identity conversion.
938        serde_json::from_value(Value::Object(object)).map_err(A::Error::custom)
939    }
940}
941
942#[cfg(test)]
943#[path = "lib_tests.rs"]
944mod tests;