Skip to main content

apcore_cli/
ref_resolver.rs

1// apcore-cli — JSON Schema $ref inliner.
2// Protocol spec: FE-08 (resolve_refs)
3
4use serde_json::{Map, Value};
5use std::collections::HashSet;
6use thiserror::Error;
7
8/// Maximum recursion depth for $ref resolution.
9pub const MAX_REF_DEPTH: usize = 32;
10
11// ---------------------------------------------------------------------------
12// Error type
13// ---------------------------------------------------------------------------
14
15/// Errors produced during `$ref` resolution.
16#[derive(Debug, Error)]
17pub enum RefResolverError {
18    /// A `$ref` target could not be found in the schema's `$defs`.
19    #[error("unresolvable $ref '{reference}' in module '{module_id}' (exit 45)")]
20    Unresolvable {
21        reference: String,
22        module_id: String,
23    },
24
25    /// A circular reference chain was detected (exit 48).
26    #[error("circular $ref detected in module '{module_id}' (exit 48)")]
27    Circular { module_id: String },
28
29    /// The maximum recursion depth was exceeded.
30    #[error("$ref resolution exceeded max depth {max_depth} in module '{module_id}'")]
31    MaxDepthExceeded { max_depth: usize, module_id: String },
32}
33
34// ---------------------------------------------------------------------------
35// resolve_refs
36// ---------------------------------------------------------------------------
37
38/// Inline all `$ref` pointers in a JSON Schema value.
39///
40/// Resolves `$ref` values by looking them up in `schema["$defs"]` and
41/// substituting the referenced schema in-place. Handles nested schemas
42/// recursively up to `max_depth`.
43///
44/// # Arguments
45/// * `schema`    — JSON Schema value (deep-copy is used internally)
46/// * `max_depth` — maximum recursion depth before raising `MaxDepthExceeded`
47/// * `module_id` — module identifier for error messages
48///
49/// # Errors
50/// * `RefResolverError::Unresolvable` — unknown `$ref` target (exit 45)
51/// * `RefResolverError::Circular`     — circular reference (exit 48)
52/// * `RefResolverError::MaxDepthExceeded` — depth limit reached
53pub fn resolve_refs(
54    schema: &Value,
55    max_depth: usize,
56    module_id: &str,
57) -> Result<Value, RefResolverError> {
58    // Deep-copy; do not modify the caller's value.
59    let copy = schema.clone();
60
61    // Extract $defs / definitions ($defs takes precedence).
62    let defs: Map<String, Value> = copy
63        .get("$defs")
64        .or_else(|| copy.get("definitions"))
65        .and_then(|v| v.as_object())
66        .cloned()
67        .unwrap_or_default();
68
69    let mut visiting: HashSet<String> = HashSet::new();
70    let resolved = resolve_node(copy, &defs, 0, max_depth, &mut visiting, module_id)?;
71
72    // Strip definition keys from result.
73    let mut result = resolved;
74    if let Some(obj) = result.as_object_mut() {
75        obj.remove("$defs");
76        obj.remove("definitions");
77    }
78    Ok(result)
79}
80
81// ---------------------------------------------------------------------------
82// Composition helpers
83// ---------------------------------------------------------------------------
84
85/// Merge all branches for allOf: union properties (later wins on conflict),
86/// concatenate required arrays.
87fn merge_allof(branches: Vec<Value>) -> Value {
88    let mut merged_props = Map::new();
89    let mut merged_required: Vec<Value> = Vec::new();
90
91    for branch in branches {
92        if let Some(props) = branch.get("properties").and_then(|v| v.as_object()) {
93            for (k, v) in props {
94                merged_props.insert(k.clone(), v.clone());
95            }
96        }
97        if let Some(req) = branch.get("required").and_then(|v| v.as_array()) {
98            merged_required.extend(req.iter().cloned());
99        }
100    }
101
102    let mut result = Map::new();
103    result.insert("properties".to_string(), Value::Object(merged_props));
104    result.insert("required".to_string(), Value::Array(merged_required));
105    Value::Object(result)
106}
107
108/// Compute the intersection of required field sets across branches.
109fn intersect_required_sets(sets: Vec<HashSet<String>>) -> Vec<Value> {
110    if sets.is_empty() {
111        return Vec::new();
112    }
113    let mut iter = sets.into_iter();
114    let first = iter.next().unwrap();
115    iter.fold(first, |acc, set| acc.intersection(&set).cloned().collect())
116        .into_iter()
117        .map(Value::String)
118        .collect()
119}
120
121/// Merge all branches for anyOf/oneOf: union properties, required = intersection.
122fn merge_anyof(branches: Vec<Value>) -> Value {
123    let mut merged_props = Map::new();
124    let mut all_required_sets: Vec<HashSet<String>> = Vec::new();
125
126    for branch in branches {
127        if let Some(props) = branch.get("properties").and_then(|v| v.as_object()) {
128            for (k, v) in props {
129                merged_props.insert(k.clone(), v.clone());
130            }
131        }
132        let set: HashSet<String> = branch
133            .get("required")
134            .and_then(|v| v.as_array())
135            .map(|arr| {
136                arr.iter()
137                    .filter_map(|v| v.as_str().map(str::to_string))
138                    .collect()
139            })
140            .unwrap_or_default();
141        all_required_sets.push(set);
142    }
143
144    let intersection = intersect_required_sets(all_required_sets);
145
146    let mut result = Map::new();
147    result.insert("properties".to_string(), Value::Object(merged_props));
148    result.insert("required".to_string(), Value::Array(intersection));
149    Value::Object(result)
150}
151
152// ---------------------------------------------------------------------------
153// resolve_node (private helper)
154// ---------------------------------------------------------------------------
155
156fn resolve_node(
157    node: Value,
158    defs: &Map<String, Value>,
159    depth: usize,
160    max_depth: usize,
161    visiting: &mut HashSet<String>,
162    module_id: &str,
163) -> Result<Value, RefResolverError> {
164    let obj = match node {
165        Value::Object(map) => map,
166        other => return Ok(other),
167    };
168
169    // Handle $ref substitution.
170    if let Some(ref_val) = obj.get("$ref") {
171        let ref_path = ref_val.as_str().unwrap_or("").to_string();
172
173        if depth >= max_depth {
174            return Err(RefResolverError::MaxDepthExceeded {
175                max_depth,
176                module_id: module_id.to_string(),
177            });
178        }
179
180        if visiting.contains(&ref_path) {
181            return Err(RefResolverError::Circular {
182                module_id: module_id.to_string(),
183            });
184        }
185
186        // Extract key: "#/$defs/Address" → "Address"
187        let key = ref_path.split('/').next_back().unwrap_or("").to_string();
188
189        let def = defs
190            .get(&key)
191            .cloned()
192            .ok_or_else(|| RefResolverError::Unresolvable {
193                reference: ref_path.clone(),
194                module_id: module_id.to_string(),
195            })?;
196
197        visiting.insert(ref_path.clone());
198        let result = resolve_node(def, defs, depth + 1, max_depth, visiting, module_id)?;
199        // Keep ref_path in visiting for the duration of this chain to detect cycles.
200        // It remains in visiting intentionally — siblings go through a fresh chain
201        // because we only remove entries when unwinding past the insertion point.
202        // However, for sibling $refs (two different properties referencing the same def),
203        // we must remove the entry after resolving so they don't block each other.
204        visiting.remove(&ref_path);
205        return Ok(result);
206    }
207
208    // Handle allOf: merge properties (later wins), concatenate required.
209    if obj.contains_key("allOf") {
210        let sub_schemas = obj
211            .get("allOf")
212            .and_then(|v| v.as_array())
213            .cloned()
214            .unwrap_or_default();
215
216        // Resolve each branch first (handles nested $refs).
217        let mut resolved_branches = Vec::with_capacity(sub_schemas.len());
218        for sub in sub_schemas {
219            let resolved_sub = resolve_node(sub, defs, depth + 1, max_depth, visiting, module_id)?;
220            resolved_branches.push(resolved_sub);
221        }
222
223        let merged = merge_allof(resolved_branches);
224        let merged_map = match merged {
225            Value::Object(m) => m,
226            _ => Map::new(),
227        };
228
229        // Carry over non-composition keys from the parent node.
230        let mut result_map = merged_map;
231
232        // Seed parent node's own `properties`/`required` into the merged result
233        // AFTER branch merging — parent properties that are NOT already present
234        // from any branch are inserted here. This matches Python behaviour where
235        // `{properties:{x:...}, allOf:[{properties:{y:...}}]}` preserves both
236        // x and y (branches win on conflict; parent fills gaps).
237        if let Some(parent_props) = obj.get("properties").and_then(|v| v.as_object()) {
238            if let Some(Value::Object(merged_props)) = result_map.get_mut("properties") {
239                for (k, v) in parent_props {
240                    merged_props.entry(k.clone()).or_insert_with(|| v.clone());
241                }
242            }
243        }
244        if let Some(parent_req) = obj.get("required").and_then(|v| v.as_array()) {
245            if let Some(Value::Array(merged_req)) = result_map.get_mut("required") {
246                for item in parent_req {
247                    if !merged_req.contains(item) {
248                        merged_req.push(item.clone());
249                    }
250                }
251            }
252        }
253
254        for (k, v) in &obj {
255            if k != "allOf" && !result_map.contains_key(k) {
256                result_map.insert(k.clone(), v.clone());
257            }
258        }
259        return Ok(Value::Object(result_map));
260    }
261
262    // Handle anyOf / oneOf (same merge logic, intersection of required).
263    for keyword in &["anyOf", "oneOf"] {
264        if obj.contains_key(*keyword) {
265            let sub_schemas = obj
266                .get(*keyword)
267                .and_then(|v| v.as_array())
268                .cloned()
269                .unwrap_or_default();
270
271            let mut resolved_branches = Vec::with_capacity(sub_schemas.len());
272            for sub in sub_schemas {
273                let resolved_sub =
274                    resolve_node(sub, defs, depth + 1, max_depth, visiting, module_id)?;
275                resolved_branches.push(resolved_sub);
276            }
277
278            let merged = merge_anyof(resolved_branches);
279            let merged_map = match merged {
280                Value::Object(m) => m,
281                _ => Map::new(),
282            };
283
284            let mut result_map = merged_map;
285            for (k, v) in &obj {
286                if k != *keyword && !result_map.contains_key(k) {
287                    result_map.insert(k.clone(), v.clone());
288                }
289            }
290            return Ok(Value::Object(result_map));
291        }
292    }
293
294    // Recursively resolve all values in the object map.
295    let mut resolved_map = Map::with_capacity(obj.len());
296    for (k, v) in obj {
297        let resolved_v = resolve_node(v, defs, depth, max_depth, visiting, module_id)?;
298        resolved_map.insert(k, resolved_v);
299    }
300
301    Ok(Value::Object(resolved_map))
302}
303
304// ---------------------------------------------------------------------------
305// Unit tests
306// ---------------------------------------------------------------------------
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311    use serde_json::json;
312
313    #[test]
314    fn test_resolve_refs_no_refs_unchanged() {
315        // A schema without any $ref must be returned unchanged.
316        let schema = json!({
317            "type": "object",
318            "properties": {
319                "name": {"type": "string"}
320            }
321        });
322        let result = resolve_refs(&schema, 32, "test.module");
323        assert!(result.is_ok());
324        let resolved = result.unwrap();
325        assert_eq!(resolved["properties"]["name"]["type"], "string");
326    }
327
328    #[test]
329    fn test_resolve_refs_simple_ref() {
330        // A single $ref must be inlined from $defs.
331        let schema = json!({
332            "$defs": {
333                "MyString": {"type": "string", "description": "A name"}
334            },
335            "type": "object",
336            "properties": {
337                "name": {"$ref": "#/$defs/MyString"}
338            }
339        });
340        let result = resolve_refs(&schema, 32, "test.module");
341        assert!(result.is_ok());
342        let resolved = result.unwrap();
343        assert_eq!(resolved["properties"]["name"]["type"], "string");
344        assert_eq!(resolved["properties"]["name"]["description"], "A name");
345        // $defs must be stripped from result.
346        assert!(resolved.get("$defs").is_none());
347    }
348
349    #[test]
350    fn test_resolve_refs_definitions_key_also_supported() {
351        // Some schemas use "definitions" instead of "$defs".
352        let schema = json!({
353            "definitions": {
354                "Addr": {"type": "string"}
355            },
356            "properties": {
357                "city": {"$ref": "#/definitions/Addr"}
358            }
359        });
360        let result = resolve_refs(&schema, 32, "test.module");
361        assert!(result.is_ok());
362        let resolved = result.unwrap();
363        assert_eq!(resolved["properties"]["city"]["type"], "string");
364        assert!(resolved.get("definitions").is_none());
365    }
366
367    #[test]
368    fn test_resolve_refs_unresolvable_returns_error() {
369        // An unknown $ref must yield RefResolverError::Unresolvable.
370        let schema = json!({
371            "type": "object",
372            "properties": {
373                "x": {"$ref": "#/$defs/DoesNotExist"}
374            }
375        });
376        let result = resolve_refs(&schema, 32, "test.module");
377        assert!(
378            matches!(result, Err(RefResolverError::Unresolvable { .. })),
379            "expected Unresolvable, got: {result:?}"
380        );
381    }
382
383    #[test]
384    fn test_resolve_refs_circular_returns_error() {
385        // A circular $ref chain must yield RefResolverError::Circular or MaxDepthExceeded.
386        let schema = json!({
387            "$defs": {
388                "A": {"$ref": "#/$defs/B"},
389                "B": {"$ref": "#/$defs/A"}
390            },
391            "properties": {
392                "x": {"$ref": "#/$defs/A"}
393            }
394        });
395        let result = resolve_refs(&schema, 32, "test.module");
396        assert!(
397            matches!(
398                result,
399                Err(RefResolverError::Circular { .. })
400                    | Err(RefResolverError::MaxDepthExceeded { .. })
401            ),
402            "expected Circular or MaxDepthExceeded, got: {result:?}"
403        );
404    }
405
406    #[test]
407    fn test_resolve_refs_max_depth_exceeded() {
408        // max_depth=0 means the first $ref hit immediately fails.
409        let schema = json!({
410            "$defs": {
411                "Inner": {"type": "string"}
412            },
413            "properties": {
414                "x": {"$ref": "#/$defs/Inner"}
415            }
416        });
417        let result = resolve_refs(&schema, 0, "test.module");
418        assert!(
419            matches!(result, Err(RefResolverError::MaxDepthExceeded { .. })),
420            "expected MaxDepthExceeded, got: {result:?}"
421        );
422    }
423
424    #[test]
425    fn test_resolve_refs_nested_defs() {
426        // $refs inside nested object properties must all be resolved.
427        let schema = json!({
428            "$defs": {
429                "City": {"type": "string"}
430            },
431            "properties": {
432                "address": {
433                    "type": "object",
434                    "properties": {
435                        "city": {"$ref": "#/$defs/City"}
436                    }
437                }
438            }
439        });
440        let result = resolve_refs(&schema, 32, "test.module");
441        assert!(result.is_ok());
442        let resolved = result.unwrap();
443        assert_eq!(
444            resolved["properties"]["address"]["properties"]["city"]["type"],
445            "string"
446        );
447    }
448
449    #[test]
450    fn test_resolve_refs_does_not_mutate_input() {
451        // The original schema must not be modified.
452        let schema = json!({
453            "$defs": {"T": {"type": "integer"}},
454            "properties": {"x": {"$ref": "#/$defs/T"}}
455        });
456        let _ = resolve_refs(&schema, 32, "test.module");
457        // Input schema still has $ref (not mutated).
458        assert_eq!(schema["properties"]["x"]["$ref"], "#/$defs/T");
459    }
460
461    #[test]
462    fn test_resolve_refs_sibling_refs_same_def() {
463        // Two different properties referencing the same $def must both resolve correctly.
464        let schema = json!({
465            "$defs": {
466                "Str": {"type": "string"}
467            },
468            "properties": {
469                "a": {"$ref": "#/$defs/Str"},
470                "b": {"$ref": "#/$defs/Str"}
471            }
472        });
473        let result = resolve_refs(&schema, 32, "test.module");
474        assert!(result.is_ok(), "sibling refs failed: {result:?}");
475        let resolved = result.unwrap();
476        assert_eq!(resolved["properties"]["a"]["type"], "string");
477        assert_eq!(resolved["properties"]["b"]["type"], "string");
478    }
479
480    // --- Schema composition tests ---
481
482    #[test]
483    fn test_allof_merges_properties() {
484        let schema = json!({
485            "allOf": [
486                {
487                    "properties": {"a": {"type": "string"}},
488                    "required": ["a"]
489                },
490                {
491                    "properties": {"b": {"type": "integer"}},
492                    "required": ["b"]
493                }
494            ]
495        });
496        let result = resolve_refs(&schema, 32, "mod").unwrap();
497        assert_eq!(result["properties"]["a"]["type"], "string");
498        assert_eq!(result["properties"]["b"]["type"], "integer");
499        let required: Vec<&str> = result["required"]
500            .as_array()
501            .unwrap()
502            .iter()
503            .filter_map(|v| v.as_str())
504            .collect();
505        assert!(required.contains(&"a"));
506        assert!(required.contains(&"b"));
507    }
508
509    #[test]
510    fn test_allof_later_schema_wins_on_conflict() {
511        let schema = json!({
512            "allOf": [
513                {"properties": {"x": {"type": "string"}}},
514                {"properties": {"x": {"type": "integer"}}}
515            ]
516        });
517        let result = resolve_refs(&schema, 32, "mod").unwrap();
518        // Later sub-schema wins: x must be integer.
519        assert_eq!(result["properties"]["x"]["type"], "integer");
520    }
521
522    #[test]
523    fn test_allof_copies_non_composition_keys() {
524        let schema = json!({
525            "description": "My type",
526            "allOf": [
527                {"properties": {"a": {"type": "string"}}}
528            ]
529        });
530        let result = resolve_refs(&schema, 32, "mod").unwrap();
531        // "description" must survive in the merged result.
532        assert_eq!(result["description"], "My type");
533    }
534
535    #[test]
536    fn test_anyof_unions_properties() {
537        let schema = json!({
538            "anyOf": [
539                {"properties": {"a": {"type": "string"}}, "required": ["a"]},
540                {"properties": {"b": {"type": "integer"}}, "required": ["b"]}
541            ]
542        });
543        let result = resolve_refs(&schema, 32, "mod").unwrap();
544        // Both properties must appear.
545        assert!(result["properties"].get("a").is_some());
546        assert!(result["properties"].get("b").is_some());
547    }
548
549    #[test]
550    fn test_anyof_required_is_intersection() {
551        let schema = json!({
552            "anyOf": [
553                {"properties": {"a": {"type": "string"}, "b": {"type": "string"}}, "required": ["a", "b"]},
554                {"properties": {"a": {"type": "string"}, "c": {"type": "string"}}, "required": ["a", "c"]}
555            ]
556        });
557        let result = resolve_refs(&schema, 32, "mod").unwrap();
558        let required: Vec<&str> = result["required"]
559            .as_array()
560            .unwrap()
561            .iter()
562            .filter_map(|v| v.as_str())
563            .collect();
564        // Only "a" appears in both branches — it is the intersection.
565        assert!(
566            required.contains(&"a"),
567            "a must be required (in both branches)"
568        );
569        assert!(
570            !required.contains(&"b"),
571            "b must not be required (only in first branch)"
572        );
573        assert!(
574            !required.contains(&"c"),
575            "c must not be required (only in second branch)"
576        );
577    }
578
579    #[test]
580    fn test_anyof_empty_required_when_no_overlap() {
581        let schema = json!({
582            "anyOf": [
583                {"properties": {"a": {"type": "string"}}, "required": ["a"]},
584                {"properties": {"b": {"type": "integer"}}, "required": ["b"]}
585            ]
586        });
587        let result = resolve_refs(&schema, 32, "mod").unwrap();
588        let required = result["required"].as_array().unwrap();
589        assert!(
590            required.is_empty(),
591            "no fields are required in both branches"
592        );
593    }
594
595    #[test]
596    fn test_oneof_behaves_like_anyof() {
597        let schema = json!({
598            "oneOf": [
599                {"properties": {"x": {"type": "string"}}, "required": ["x"]},
600                {"properties": {"y": {"type": "integer"}}, "required": ["y"]}
601            ]
602        });
603        let result = resolve_refs(&schema, 32, "mod").unwrap();
604        assert!(result["properties"].get("x").is_some());
605        assert!(result["properties"].get("y").is_some());
606        assert!(result["required"].as_array().unwrap().is_empty());
607    }
608
609    #[test]
610    fn test_allof_with_nested_ref() {
611        // allOf sub-schema that itself contains a $ref.
612        let schema = json!({
613            "$defs": {
614                "Base": {"properties": {"id": {"type": "integer"}}, "required": ["id"]}
615            },
616            "allOf": [
617                {"$ref": "#/$defs/Base"},
618                {"properties": {"name": {"type": "string"}}}
619            ]
620        });
621        let result = resolve_refs(&schema, 32, "mod").unwrap();
622        assert_eq!(result["properties"]["id"]["type"], "integer");
623        assert_eq!(result["properties"]["name"]["type"], "string");
624        let required: Vec<&str> = result["required"]
625            .as_array()
626            .unwrap()
627            .iter()
628            .filter_map(|v| v.as_str())
629            .collect();
630        assert!(required.contains(&"id"));
631    }
632}