Skip to main content

mnem_core/codec/
json.rs

1//! Canonical `serde_json::Value` -> [`Ipld`] conversion for untrusted input.
2//!
3//! Three surfaces feed untrusted JSON into mnem: the CLI (`mnem ...
4//! --prop key=value`), `mnem-http` (request bodies on `/v1/*`), and
5//! `mnem-mcp` (tool-call `arguments` objects). Before `json_to_ipld`
6//! lived here, each of those crates carried its own near-identical
7//! implementation, each with its own copy of [`IPLD_MAX_DEPTH`], its
8//! own `u64 > i64::MAX` rejection path, and its own error type
9//! (`anyhow::Result`, `Result<_, String>`, `anyhow::Result`). Every
10//! future hardening change had to be replicated across three files,
11//! and the three were already out-of-sync in subtle ways (error
12//! message wording drift, different comment wording).
13//!
14//! This module is the canonical implementation. All three callers
15//! re-export [`json_to_ipld`] and adapt [`JsonIpldError`] to their
16//! local error boundary:
17//!
18//! - `mnem-cli`: `?` through `anyhow::Error` (the library `Display`
19//!   impl threads directly).
20//! - `mnem-http`: `map_err` to `mnem_http::error::Error::BadRequest` so
21//!   a malformed JSON body returns HTTP 400 with a specific reason.
22//! - `mnem-mcp`: `map_err` to an MCP `error.invalid_params` response
23//!   carrying the same `Display` string as a structured field.
24//!
25//! ## Hardening
26//!
27//! Two concrete attacker-controlled inputs motivate this module's
28//! shape:
29//!
30//! 1. **Deeply-nested arrays/objects.** A stock recursive-descent
31//!    converter stack-overflows on `[[[[[[[...]]]]]]]` with a few
32//!    thousand levels of nesting. [`IPLD_MAX_DEPTH`] caps the
33//!    traversal at 64 levels, matching [`crate::codec::dagcbor::WALK_IPLD_MAX_DEPTH`]
34//!    so a payload cannot pass this check and then fail further down
35//!    the pipeline.
36//! 2. **Unsigned ids above `i64::MAX`.** Silently demoting such a
37//!    value to [`Ipld::Float`] loses precision above 2^53 (a 19-digit
38//!    id becomes a rounded double). Reject instead: callers that
39//!    really need a 64-bit unsigned id must send it as a string.
40
41use std::collections::BTreeMap;
42
43use ipld_core::ipld::Ipld;
44use serde_json::Value;
45use thiserror::Error;
46
47/// Maximum depth of nested JSON objects / arrays [`json_to_ipld`]
48/// will walk. Beyond this, the conversion returns an error rather
49/// than recursing. Picked at 64 because legitimate agent-memory
50/// props rarely nest past ~6, while a malicious payload can cheaply
51/// ship arbitrary depth and stack-overflow the process.
52///
53/// This MUST stay equal to
54/// [`crate::codec::dagcbor::WALK_IPLD_MAX_DEPTH`]: a payload that
55/// clears the input-layer cap must also clear the decode-layer cap
56/// so there is no gap between "accepted on the wire" and "decodable
57/// after a round-trip through DAG-CBOR".
58pub const IPLD_MAX_DEPTH: usize = 64;
59
60/// Failure modes for [`json_to_ipld`].
61///
62/// Deliberately coarse-grained; each variant carries enough detail
63/// for a caller to render a user-facing error without string-parsing
64/// the `Display` output.
65#[derive(Debug, Error)]
66#[non_exhaustive]
67pub enum JsonIpldError {
68    /// The input exceeded [`IPLD_MAX_DEPTH`] levels of nesting.
69    #[error("json_to_ipld: nesting exceeds depth cap of {cap}")]
70    DepthExceeded {
71        /// The cap that was exceeded; always equals [`IPLD_MAX_DEPTH`].
72        cap: usize,
73    },
74    /// A JSON `Number` was an unsigned integer greater than
75    /// `i64::MAX`. Such values cannot round-trip through
76    /// [`Ipld::Integer`] (which is `i128` but DAG-CBOR encodes only
77    /// `i64` / `u64`) without ambiguity; the old "demote to
78    /// [`Ipld::Float`]" path silently lost precision above 2^53.
79    #[error("json_to_ipld: unsigned integer {value} exceeds i64::MAX; send as a string if id-like")]
80    UnsignedOverflow {
81        /// The rejected value, rendered as it appeared in the input.
82        value: String,
83    },
84    /// A JSON `Number` was neither an `i64`, a `u64`, nor a finite
85    /// `f64`. In practice this cannot happen from `serde_json` today
86    /// (the `Number` variants exhaust the space) but is kept as a
87    /// defensive catch-all.
88    #[error("json_to_ipld: unsupported JSON number {value}")]
89    UnsupportedNumber {
90        /// The rejected value, rendered as it appeared in the input.
91        value: String,
92    },
93}
94
95/// Convert a [`serde_json::Value`] into an [`Ipld`] value, rejecting
96/// deeply-nested or precision-losing inputs.
97///
98/// # Errors
99///
100/// Returns [`JsonIpldError::DepthExceeded`] if the input nests past
101/// [`IPLD_MAX_DEPTH`]; [`JsonIpldError::UnsignedOverflow`] if a
102/// numeric field is `> i64::MAX`; [`JsonIpldError::UnsupportedNumber`]
103/// for any other unhandled numeric shape.
104pub fn json_to_ipld(v: &Value) -> Result<Ipld, JsonIpldError> {
105    json_to_ipld_at(v, 0)
106}
107
108fn json_to_ipld_at(v: &Value, depth: usize) -> Result<Ipld, JsonIpldError> {
109    if depth >= IPLD_MAX_DEPTH {
110        return Err(JsonIpldError::DepthExceeded {
111            cap: IPLD_MAX_DEPTH,
112        });
113    }
114    Ok(match v {
115        Value::Null => Ipld::Null,
116        Value::Bool(b) => Ipld::Bool(*b),
117        Value::Number(n) => {
118            if let Some(i) = n.as_i64() {
119                Ipld::Integer(i128::from(i))
120            } else if n.is_u64() {
121                return Err(JsonIpldError::UnsignedOverflow {
122                    value: n.to_string(),
123                });
124            } else if let Some(f) = n.as_f64() {
125                Ipld::Float(f)
126            } else {
127                return Err(JsonIpldError::UnsupportedNumber {
128                    value: n.to_string(),
129                });
130            }
131        }
132        Value::String(s) => Ipld::String(s.clone()),
133        Value::Array(xs) => Ipld::List(
134            xs.iter()
135                .map(|x| json_to_ipld_at(x, depth + 1))
136                .collect::<Result<Vec<_>, _>>()?,
137        ),
138        Value::Object(m) => {
139            let mut out = BTreeMap::new();
140            for (k, v) in m {
141                out.insert(k.clone(), json_to_ipld_at(v, depth + 1)?);
142            }
143            Ipld::Map(out)
144        }
145    })
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use serde_json::json;
152
153    #[test]
154    fn null_bool_string_roundtrip() {
155        assert_eq!(json_to_ipld(&Value::Null).unwrap(), Ipld::Null);
156        assert_eq!(json_to_ipld(&json!(true)).unwrap(), Ipld::Bool(true));
157        assert_eq!(
158            json_to_ipld(&json!("hello")).unwrap(),
159            Ipld::String("hello".to_string())
160        );
161    }
162
163    #[test]
164    fn i64_as_integer() {
165        assert_eq!(
166            json_to_ipld(&json!(42_i64)).unwrap(),
167            Ipld::Integer(42_i128)
168        );
169        assert_eq!(
170            json_to_ipld(&json!(i64::MIN)).unwrap(),
171            Ipld::Integer(i128::from(i64::MIN))
172        );
173        assert_eq!(
174            json_to_ipld(&json!(i64::MAX)).unwrap(),
175            Ipld::Integer(i128::from(i64::MAX))
176        );
177    }
178
179    #[test]
180    fn u64_gt_i64_max_rejected() {
181        let err = json_to_ipld(&json!(u64::MAX)).unwrap_err();
182        assert!(matches!(err, JsonIpldError::UnsignedOverflow { .. }));
183    }
184
185    #[test]
186    fn float_preserved() {
187        assert_eq!(json_to_ipld(&json!(1.5_f64)).unwrap(), Ipld::Float(1.5));
188    }
189
190    #[test]
191    fn deeply_nested_rejected() {
192        // Build 128 levels of array nesting - well past the 64 cap.
193        let mut v = Value::Null;
194        for _ in 0..128 {
195            v = Value::Array(vec![v]);
196        }
197        let err = json_to_ipld(&v).unwrap_err();
198        assert!(matches!(
199            err,
200            JsonIpldError::DepthExceeded {
201                cap: IPLD_MAX_DEPTH
202            }
203        ));
204    }
205
206    #[test]
207    fn nested_map_respects_cap() {
208        // 65 nested objects: {a: {a: {a: ... {a: null}}}}
209        let mut v = Value::Null;
210        for _ in 0..65 {
211            let mut m = serde_json::Map::new();
212            m.insert("a".into(), v);
213            v = Value::Object(m);
214        }
215        let err = json_to_ipld(&v).unwrap_err();
216        assert!(matches!(err, JsonIpldError::DepthExceeded { .. }));
217    }
218
219    #[test]
220    fn shallow_nesting_ok() {
221        // 10 levels of nesting: comfortably under the 64 cap.
222        let mut v = Value::Null;
223        for _ in 0..10 {
224            v = Value::Array(vec![v]);
225        }
226        let _ = json_to_ipld(&v).unwrap();
227    }
228
229    #[test]
230    fn array_and_object_mixed() {
231        let v = json!({
232            "name": "a",
233            "xs": [1, 2, 3],
234            "meta": { "kind": "note", "active": true }
235        });
236        let out = json_to_ipld(&v).unwrap();
237        let Ipld::Map(m) = out else {
238            panic!("expected map");
239        };
240        assert!(m.contains_key("name"));
241        assert!(m.contains_key("xs"));
242        assert!(m.contains_key("meta"));
243    }
244}