Skip to main content

wallfacer_core/mutate/
compose.rs

1//! JSON Schema composition resolution: `$ref`, `$defs`/`definitions`, and
2//! `allOf` merging.
3//!
4//! The runtime composition keywords (`oneOf`, `anyOf`, `not`, `if/then/else`,
5//! `dependentRequired`) are handled in [`crate::mutate::schema_gen`] because
6//! they require RNG-driven choices and rejection sampling. This module
7//! provides the deterministic, side-effect-free pieces.
8//!
9//! # Limits
10//!
11//! * Only **local** references are resolved (`#/$defs/...`,
12//!   `#/definitions/...`, or `#/...` JSON pointers into the root document).
13//!   Remote refs (`http://...`, `file:...`) are returned as
14//!   [`ComposeError::ExternalRef`] and the caller should treat the schema as
15//!   a skip.
16//! * A `$ref` cycle returns [`ComposeError::Cycle`] once the same pointer is
17//!   visited a second time on the current resolution path.
18//! * `allOf` is merged with a pragmatic *last-write-wins per keyword* policy
19//!   except for `properties` (deep-merged), `required` (set union), and
20//!   `type` (first non-conflicting wins).
21
22use std::collections::BTreeSet;
23
24use serde_json::{Map, Value};
25use thiserror::Error;
26
27/// Hard cap on the depth of `$ref` resolution chains. A chain longer
28/// than this is reported as [`ComposeError::DepthExceeded`] so cyclic or
29/// pathological schemas can't lock the generator into an infinite walk.
30pub const MAX_REF_DEPTH: usize = 16;
31
32/// Errors raised by composition resolution.
33#[derive(Debug, Error)]
34pub enum ComposeError {
35    /// Reference uses a non-local URI.
36    #[error("external `$ref` is not supported: {0}")]
37    ExternalRef(String),
38    /// JSON pointer did not resolve to a definition.
39    #[error("could not resolve `$ref` `{0}`")]
40    UnresolvedRef(String),
41    /// Reference cycle detected.
42    #[error("cyclic `$ref` detected at `{0}`")]
43    Cycle(String),
44    /// Reference chain exceeded [`MAX_REF_DEPTH`].
45    #[error("`$ref` chain exceeded depth {MAX_REF_DEPTH} at `{0}`")]
46    DepthExceeded(String),
47}
48
49/// Result alias for composition operations.
50pub type Result<T> = std::result::Result<T, ComposeError>;
51
52/// Returns a copy of `schema` with any `$ref` resolved against `root`. If the
53/// schema does not contain `$ref`, returns the schema unchanged. Recurses
54/// into nested `$ref`s up to [`MAX_REF_DEPTH`].
55///
56/// `seen` accumulates pointers visited on the current resolution path for
57/// cycle detection. Pass an empty `BTreeSet` for the first call.
58pub fn dereference(root: &Value, schema: &Value, seen: &mut BTreeSet<String>) -> Result<Value> {
59    let mut current = schema.clone();
60    let mut depth = 0usize;
61    while let Some(ref_str) = current
62        .as_object()
63        .and_then(|map| map.get("$ref"))
64        .and_then(Value::as_str)
65    {
66        if !seen.insert(ref_str.to_string()) {
67            return Err(ComposeError::Cycle(ref_str.to_string()));
68        }
69        if depth >= MAX_REF_DEPTH {
70            return Err(ComposeError::DepthExceeded(ref_str.to_string()));
71        }
72        depth += 1;
73        let resolved = resolve_pointer(root, ref_str)?;
74        // The reference may carry sibling keys (rare but legal in 2020-12).
75        // Merge them on top of the resolved schema.
76        if let Some(siblings) = current.as_object() {
77            let mut merged = resolved.as_object().cloned().unwrap_or_default();
78            for (key, value) in siblings {
79                if key != "$ref" {
80                    merged.insert(key.clone(), value.clone());
81                }
82            }
83            current = Value::Object(merged);
84        } else {
85            current = resolved;
86        }
87    }
88    Ok(current)
89}
90
91/// Resolves a JSON Pointer style `$ref` against the root document.
92/// Accepts `#/$defs/foo`, `#/definitions/foo`, or arbitrary `#/path/to/x`.
93fn resolve_pointer(root: &Value, ref_str: &str) -> Result<Value> {
94    let pointer = ref_str.strip_prefix('#').ok_or_else(|| {
95        // Anything that does not start with `#` is treated as external.
96        ComposeError::ExternalRef(ref_str.to_string())
97    })?;
98    if pointer.is_empty() {
99        return Ok(root.clone());
100    }
101    root.pointer(pointer)
102        .cloned()
103        .ok_or_else(|| ComposeError::UnresolvedRef(ref_str.to_string()))
104}
105
106/// Merges an `allOf` array into a single schema. Each entry is dereferenced
107/// against `root` first.
108///
109/// Merge rules:
110///
111/// * `type`: kept from the first sub-schema; conflicting types are silently
112///   ignored to keep generation possible. Validators will still flag a real
113///   conflict separately.
114/// * `properties`: deep-merged. If two sub-schemas define the same property,
115///   the right-hand one's value wins.
116/// * `required`: set union.
117/// * `enum`: kept from the first sub-schema.
118/// * Any other keyword: last non-null write wins.
119pub fn merge_all_of(root: &Value, schemas: &[Value]) -> Result<Value> {
120    let mut merged = Map::new();
121    let mut required: BTreeSet<String> = BTreeSet::new();
122
123    for schema in schemas {
124        let mut seen = BTreeSet::new();
125        let resolved = dereference(root, schema, &mut seen)?;
126        let Some(object) = resolved.as_object() else {
127            continue;
128        };
129        for (key, value) in object {
130            match key.as_str() {
131                "properties" => {
132                    let entry = merged
133                        .entry(key.clone())
134                        .or_insert_with(|| Value::Object(Map::new()));
135                    if let (Some(target), Some(source)) = (entry.as_object_mut(), value.as_object())
136                    {
137                        for (prop_key, prop_value) in source {
138                            target.insert(prop_key.clone(), prop_value.clone());
139                        }
140                    }
141                }
142                "required" => {
143                    if let Some(items) = value.as_array() {
144                        for item in items {
145                            if let Some(name) = item.as_str() {
146                                required.insert(name.to_string());
147                            }
148                        }
149                    }
150                }
151                "type" => {
152                    merged.entry(key.clone()).or_insert_with(|| value.clone());
153                }
154                _ => {
155                    merged.insert(key.clone(), value.clone());
156                }
157            }
158        }
159    }
160
161    if !required.is_empty() {
162        merged.insert(
163            "required".to_string(),
164            Value::Array(required.into_iter().map(Value::String).collect()),
165        );
166    }
167
168    Ok(Value::Object(merged))
169}
170
171#[cfg(test)]
172#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
173mod tests {
174    use super::*;
175    use serde_json::json;
176
177    #[test]
178    fn dereference_resolves_local_ref() {
179        let root = json!({
180            "$defs": {
181                "User": {"type": "object", "properties": {"name": {"type": "string"}}}
182            },
183            "type": "object",
184            "properties": {"user": {"$ref": "#/$defs/User"}}
185        });
186        let schema = json!({"$ref": "#/$defs/User"});
187        let mut seen = BTreeSet::new();
188        let resolved = dereference(&root, &schema, &mut seen).unwrap();
189        assert_eq!(resolved["type"], json!("object"));
190    }
191
192    #[test]
193    fn dereference_resolves_definitions_alias() {
194        let root = json!({
195            "definitions": {"X": {"type": "integer"}}
196        });
197        let schema = json!({"$ref": "#/definitions/X"});
198        let mut seen = BTreeSet::new();
199        let resolved = dereference(&root, &schema, &mut seen).unwrap();
200        assert_eq!(resolved["type"], json!("integer"));
201    }
202
203    #[test]
204    fn dereference_detects_cycle() {
205        let root = json!({
206            "$defs": {
207                "A": {"$ref": "#/$defs/B"},
208                "B": {"$ref": "#/$defs/A"}
209            }
210        });
211        let schema = json!({"$ref": "#/$defs/A"});
212        let mut seen = BTreeSet::new();
213        let err = dereference(&root, &schema, &mut seen).unwrap_err();
214        assert!(matches!(err, ComposeError::Cycle(_)));
215    }
216
217    #[test]
218    fn dereference_rejects_external_ref() {
219        let root = json!({});
220        let schema = json!({"$ref": "https://example.com/schema.json"});
221        let mut seen = BTreeSet::new();
222        let err = dereference(&root, &schema, &mut seen).unwrap_err();
223        assert!(matches!(err, ComposeError::ExternalRef(_)));
224    }
225
226    #[test]
227    fn dereference_unresolved_pointer() {
228        let root = json!({"$defs": {}});
229        let schema = json!({"$ref": "#/$defs/Missing"});
230        let mut seen = BTreeSet::new();
231        let err = dereference(&root, &schema, &mut seen).unwrap_err();
232        assert!(matches!(err, ComposeError::UnresolvedRef(_)));
233    }
234
235    #[test]
236    fn dereference_preserves_sibling_keys() {
237        let root = json!({"$defs": {"Base": {"type": "string", "minLength": 1}}});
238        let schema = json!({"$ref": "#/$defs/Base", "maxLength": 10});
239        let mut seen = BTreeSet::new();
240        let resolved = dereference(&root, &schema, &mut seen).unwrap();
241        assert_eq!(resolved["type"], json!("string"));
242        assert_eq!(resolved["minLength"], json!(1));
243        assert_eq!(resolved["maxLength"], json!(10));
244    }
245
246    #[test]
247    fn merge_all_of_unions_required_and_properties() {
248        let root = json!({});
249        let schemas = vec![
250            json!({"type": "object", "properties": {"a": {"type": "string"}}, "required": ["a"]}),
251            json!({"properties": {"b": {"type": "integer"}}, "required": ["b"]}),
252        ];
253        let merged = merge_all_of(&root, &schemas).unwrap();
254        assert_eq!(merged["type"], json!("object"));
255        assert!(merged["properties"]["a"].is_object());
256        assert!(merged["properties"]["b"].is_object());
257        let required = merged["required"].as_array().unwrap();
258        assert_eq!(required.len(), 2);
259    }
260
261    #[test]
262    fn merge_all_of_resolves_refs_in_branches() {
263        let root = json!({"$defs": {"Name": {"type": "string"}}});
264        let schemas = vec![json!({"$ref": "#/$defs/Name"}), json!({"minLength": 1})];
265        let merged = merge_all_of(&root, &schemas).unwrap();
266        assert_eq!(merged["type"], json!("string"));
267        assert_eq!(merged["minLength"], json!(1));
268    }
269}