mnem_core/codec/json.rs
1//! Canonical `serde_json::Value` -> [`Ipld`] conversion for untrusted input.
2//!
3//! Three surfaces feed untrusted JSON into mnem: the CLI (`mnem ...
4//! --prop key=value`), `mnem-http` (request bodies on `/v1/*`), and
5//! `mnem-mcp` (tool-call `arguments` objects). Before `json_to_ipld`
6//! lived here, each of those crates carried its own near-identical
7//! implementation, each with its own copy of [`IPLD_MAX_DEPTH`], its
8//! own `u64 > i64::MAX` rejection path, and its own error type
9//! (`anyhow::Result`, `Result<_, String>`, `anyhow::Result`). Every
10//! future hardening change had to be replicated across three files,
11//! and the three were already out-of-sync in subtle ways (error
12//! message wording drift, different comment wording).
13//!
14//! This module is the canonical implementation. All three callers
15//! re-export [`json_to_ipld`] and adapt [`JsonIpldError`] to their
16//! local error boundary:
17//!
18//! - `mnem-cli`: `?` through `anyhow::Error` (the library `Display`
19//! impl threads directly).
20//! - `mnem-http`: `map_err` to `mnem_http::error::Error::BadRequest` so
21//! a malformed JSON body returns HTTP 400 with a specific reason.
22//! - `mnem-mcp`: `map_err` to an MCP `error.invalid_params` response
23//! carrying the same `Display` string as a structured field.
24//!
25//! ## Hardening
26//!
27//! Two concrete attacker-controlled inputs motivate this module's
28//! shape:
29//!
30//! 1. **Deeply-nested arrays/objects.** A stock recursive-descent
31//! converter stack-overflows on `[[[[[[[...]]]]]]]` with a few
32//! thousand levels of nesting. [`IPLD_MAX_DEPTH`] caps the
33//! traversal at 64 levels, matching [`crate::codec::dagcbor::WALK_IPLD_MAX_DEPTH`]
34//! so a payload cannot pass this check and then fail further down
35//! the pipeline.
36//! 2. **Unsigned ids above `i64::MAX`.** Silently demoting such a
37//! value to [`Ipld::Float`] loses precision above 2^53 (a 19-digit
38//! id becomes a rounded double). Reject instead: callers that
39//! really need a 64-bit unsigned id must send it as a string.
40
41use std::collections::BTreeMap;
42
43use ipld_core::ipld::Ipld;
44use serde_json::Value;
45use thiserror::Error;
46
47/// Maximum depth of nested JSON objects / arrays [`json_to_ipld`]
48/// will walk. Beyond this, the conversion returns an error rather
49/// than recursing. Picked at 64 because legitimate agent-memory
50/// props rarely nest past ~6, while a malicious payload can cheaply
51/// ship arbitrary depth and stack-overflow the process.
52///
53/// This MUST stay equal to
54/// [`crate::codec::dagcbor::WALK_IPLD_MAX_DEPTH`]: a payload that
55/// clears the input-layer cap must also clear the decode-layer cap
56/// so there is no gap between "accepted on the wire" and "decodable
57/// after a round-trip through DAG-CBOR".
58pub const IPLD_MAX_DEPTH: usize = 64;
59
60/// Failure modes for [`json_to_ipld`].
61///
62/// Deliberately coarse-grained; each variant carries enough detail
63/// for a caller to render a user-facing error without string-parsing
64/// the `Display` output.
65#[derive(Debug, Error)]
66#[non_exhaustive]
67pub enum JsonIpldError {
68 /// The input exceeded [`IPLD_MAX_DEPTH`] levels of nesting.
69 #[error("json_to_ipld: nesting exceeds depth cap of {cap}")]
70 DepthExceeded {
71 /// The cap that was exceeded; always equals [`IPLD_MAX_DEPTH`].
72 cap: usize,
73 },
74 /// A JSON `Number` was an unsigned integer greater than
75 /// `i64::MAX`. Such values cannot round-trip through
76 /// [`Ipld::Integer`] (which is `i128` but DAG-CBOR encodes only
77 /// `i64` / `u64`) without ambiguity; the old "demote to
78 /// [`Ipld::Float`]" path silently lost precision above 2^53.
79 #[error("json_to_ipld: unsigned integer {value} exceeds i64::MAX; send as a string if id-like")]
80 UnsignedOverflow {
81 /// The rejected value, rendered as it appeared in the input.
82 value: String,
83 },
84 /// A JSON `Number` was neither an `i64`, a `u64`, nor a finite
85 /// `f64`. In practice this cannot happen from `serde_json` today
86 /// (the `Number` variants exhaust the space) but is kept as a
87 /// defensive catch-all.
88 #[error("json_to_ipld: unsupported JSON number {value}")]
89 UnsupportedNumber {
90 /// The rejected value, rendered as it appeared in the input.
91 value: String,
92 },
93}
94
95/// Convert a [`serde_json::Value`] into an [`Ipld`] value, rejecting
96/// deeply-nested or precision-losing inputs.
97///
98/// # Errors
99///
100/// Returns [`JsonIpldError::DepthExceeded`] if the input nests past
101/// [`IPLD_MAX_DEPTH`]; [`JsonIpldError::UnsignedOverflow`] if a
102/// numeric field is `> i64::MAX`; [`JsonIpldError::UnsupportedNumber`]
103/// for any other unhandled numeric shape.
104pub fn json_to_ipld(v: &Value) -> Result<Ipld, JsonIpldError> {
105 json_to_ipld_at(v, 0)
106}
107
108fn json_to_ipld_at(v: &Value, depth: usize) -> Result<Ipld, JsonIpldError> {
109 if depth >= IPLD_MAX_DEPTH {
110 return Err(JsonIpldError::DepthExceeded {
111 cap: IPLD_MAX_DEPTH,
112 });
113 }
114 Ok(match v {
115 Value::Null => Ipld::Null,
116 Value::Bool(b) => Ipld::Bool(*b),
117 Value::Number(n) => {
118 if let Some(i) = n.as_i64() {
119 Ipld::Integer(i128::from(i))
120 } else if n.is_u64() {
121 return Err(JsonIpldError::UnsignedOverflow {
122 value: n.to_string(),
123 });
124 } else if let Some(f) = n.as_f64() {
125 Ipld::Float(f)
126 } else {
127 return Err(JsonIpldError::UnsupportedNumber {
128 value: n.to_string(),
129 });
130 }
131 }
132 Value::String(s) => Ipld::String(s.clone()),
133 Value::Array(xs) => Ipld::List(
134 xs.iter()
135 .map(|x| json_to_ipld_at(x, depth + 1))
136 .collect::<Result<Vec<_>, _>>()?,
137 ),
138 Value::Object(m) => {
139 let mut out = BTreeMap::new();
140 for (k, v) in m {
141 out.insert(k.clone(), json_to_ipld_at(v, depth + 1)?);
142 }
143 Ipld::Map(out)
144 }
145 })
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151 use serde_json::json;
152
153 #[test]
154 fn null_bool_string_roundtrip() {
155 assert_eq!(json_to_ipld(&Value::Null).unwrap(), Ipld::Null);
156 assert_eq!(json_to_ipld(&json!(true)).unwrap(), Ipld::Bool(true));
157 assert_eq!(
158 json_to_ipld(&json!("hello")).unwrap(),
159 Ipld::String("hello".to_string())
160 );
161 }
162
163 #[test]
164 fn i64_as_integer() {
165 assert_eq!(
166 json_to_ipld(&json!(42_i64)).unwrap(),
167 Ipld::Integer(42_i128)
168 );
169 assert_eq!(
170 json_to_ipld(&json!(i64::MIN)).unwrap(),
171 Ipld::Integer(i128::from(i64::MIN))
172 );
173 assert_eq!(
174 json_to_ipld(&json!(i64::MAX)).unwrap(),
175 Ipld::Integer(i128::from(i64::MAX))
176 );
177 }
178
179 #[test]
180 fn u64_gt_i64_max_rejected() {
181 let err = json_to_ipld(&json!(u64::MAX)).unwrap_err();
182 assert!(matches!(err, JsonIpldError::UnsignedOverflow { .. }));
183 }
184
185 #[test]
186 fn float_preserved() {
187 assert_eq!(json_to_ipld(&json!(1.5_f64)).unwrap(), Ipld::Float(1.5));
188 }
189
190 #[test]
191 fn deeply_nested_rejected() {
192 // Build 128 levels of array nesting - well past the 64 cap.
193 let mut v = Value::Null;
194 for _ in 0..128 {
195 v = Value::Array(vec![v]);
196 }
197 let err = json_to_ipld(&v).unwrap_err();
198 assert!(matches!(
199 err,
200 JsonIpldError::DepthExceeded {
201 cap: IPLD_MAX_DEPTH
202 }
203 ));
204 }
205
206 #[test]
207 fn nested_map_respects_cap() {
208 // 65 nested objects: {a: {a: {a: ... {a: null}}}}
209 let mut v = Value::Null;
210 for _ in 0..65 {
211 let mut m = serde_json::Map::new();
212 m.insert("a".into(), v);
213 v = Value::Object(m);
214 }
215 let err = json_to_ipld(&v).unwrap_err();
216 assert!(matches!(err, JsonIpldError::DepthExceeded { .. }));
217 }
218
219 #[test]
220 fn shallow_nesting_ok() {
221 // 10 levels of nesting: comfortably under the 64 cap.
222 let mut v = Value::Null;
223 for _ in 0..10 {
224 v = Value::Array(vec![v]);
225 }
226 let _ = json_to_ipld(&v).unwrap();
227 }
228
229 #[test]
230 fn array_and_object_mixed() {
231 let v = json!({
232 "name": "a",
233 "xs": [1, 2, 3],
234 "meta": { "kind": "note", "active": true }
235 });
236 let out = json_to_ipld(&v).unwrap();
237 let Ipld::Map(m) = out else {
238 panic!("expected map");
239 };
240 assert!(m.contains_key("name"));
241 assert!(m.contains_key("xs"));
242 assert!(m.contains_key("meta"));
243 }
244}