use std::collections::HashSet;
use serde_json::{Map, Value};
const PROJECTION_DEF_NAMES: &[&str] = &[
"FieldMeaning",
"Intent",
"ServiceDef",
"Cardinality",
"ActionDef",
"GuardDef",
"StateDef",
];
fn has_projection_defs(schema: &Value) -> bool {
schema
.get("$defs")
.and_then(|d| d.as_object())
.map(|defs| PROJECTION_DEF_NAMES.iter().any(|n| defs.contains_key(*n)))
.unwrap_or(false)
}
fn close_projection_enum(defs: &mut Map<String, Value>, name: &str) {
if let Some(entry) = defs.get_mut(name) {
let desc = entry.get("description").cloned();
if let Some(branches) = entry
.get("anyOf")
.and_then(|a| a.as_array())
.map(|a| a.to_vec())
{
let closed = if let Some(enum_branch) =
branches.iter().find(|b| b.get("enum").is_some()).cloned()
{
let mut closed = enum_branch;
if let (Some(d), Some(obj)) = (desc, closed.as_object_mut()) {
obj.entry("description").or_insert(d);
}
closed
} else {
let consts: Vec<Value> = branches
.iter()
.filter_map(|b| b.get("const").cloned())
.collect();
if consts.is_empty() {
return;
}
let mut obj = serde_json::Map::new();
obj.insert("type".into(), Value::String("string".into()));
obj.insert("enum".into(), Value::Array(consts));
if let Some(d) = desc {
obj.insert("description".into(), d);
}
Value::Object(obj)
};
*entry = closed;
}
}
}
const STRIP_KEYWORDS: &[&str] = &[
"$schema",
"$id",
"title",
"examples",
"minimum",
"maximum",
"multipleOf",
"minLength",
"maxLength",
"pattern",
];
const ALLOWED_FORMATS: &[&str] = &[
"date-time",
"date",
"time",
"duration",
"email",
"hostname",
"uri",
"ipv4",
"ipv6",
"uuid",
];
pub fn for_structured_output(schema: Value) -> Value {
let mut root = schema;
if has_projection_defs(&root) {
if let Some(defs_mut) = root.get_mut("$defs").and_then(|d| d.as_object_mut()) {
close_projection_enum(defs_mut, "FieldMeaning");
close_projection_enum(defs_mut, "Intent");
}
}
let mut defs: Map<String, Value> = Map::new();
if let Some(obj) = root.as_object() {
if let Some(Value::Object(d)) = obj.get("$defs") {
for (k, v) in d {
defs.insert(k.clone(), v.clone());
}
}
if let Some(Value::Object(d)) = obj.get("definitions") {
for (k, v) in d {
defs.insert(k.clone(), v.clone());
}
}
}
let mut visited: HashSet<String> = HashSet::new();
root = resolve_refs(root, &defs, &mut visited);
if let Some(obj) = root.as_object_mut() {
obj.remove("$defs");
obj.remove("definitions");
}
normalize_node(root)
}
fn resolve_refs(node: Value, defs: &Map<String, Value>, visited: &mut HashSet<String>) -> Value {
match node {
Value::Object(ref obj) if obj.contains_key("$ref") => {
if let Some(ref_str) = obj.get("$ref").and_then(|v| v.as_str()) {
if let Some(name) = parse_ref_name(ref_str) {
if visited.contains(name) {
return serde_json::json!({"type": "object"});
}
if let Some(def) = defs.get(name) {
visited.insert(name.to_string());
let resolved = resolve_refs(def.clone(), defs, visited);
visited.remove(name);
if obj.len() > 1 {
if let Value::Object(mut resolved_obj) = resolved {
for (k, v) in obj {
if k != "$ref" {
resolved_obj.insert(k.clone(), v.clone());
}
}
return Value::Object(resolved_obj);
}
}
return resolved;
}
}
}
let Value::Object(obj) = node else {
unreachable!()
};
let mut new_obj = Map::with_capacity(obj.len());
for (k, v) in obj {
new_obj.insert(k, resolve_refs(v, defs, visited));
}
Value::Object(new_obj)
}
Value::Object(obj) => {
let mut new_obj = Map::with_capacity(obj.len());
for (k, v) in obj {
new_obj.insert(k, resolve_refs(v, defs, visited));
}
Value::Object(new_obj)
}
Value::Array(arr) => {
Value::Array(
arr.into_iter()
.map(|elem| resolve_refs(elem, defs, visited))
.collect(),
)
}
other => other,
}
}
fn parse_ref_name(ref_str: &str) -> Option<&str> {
if let Some(name) = ref_str.strip_prefix("#/$defs/") {
return Some(name);
}
if let Some(name) = ref_str.strip_prefix("#/definitions/") {
return Some(name);
}
None
}
fn normalize_node(node: Value) -> Value {
match node {
Value::Object(obj) => {
let mut new_obj = Map::with_capacity(obj.len());
for (k, v) in obj {
if STRIP_KEYWORDS.contains(&k.as_str()) {
continue;
}
if k == "format" {
if let Some(fmt) = v.as_str() {
if ALLOWED_FORMATS.contains(&fmt) {
new_obj.insert(k, v);
}
}
continue;
}
new_obj.insert(k, normalize_node(v));
}
let is_object_type = new_obj
.get("type")
.and_then(|t| t.as_str())
.map(|t| t == "object")
.unwrap_or(false);
let has_properties = new_obj.contains_key("properties");
if is_object_type && has_properties {
new_obj
.entry("additionalProperties")
.or_insert(Value::Bool(false));
}
Value::Object(new_obj)
}
Value::Array(arr) => Value::Array(arr.into_iter().map(normalize_node).collect()),
other => other,
}
}
#[cfg(test)]
mod tests {
use schemars::schema_for;
use serde_json::{json, Value};
use super::*;
#[test]
fn schema_probe_field_meaning_any_of_shape() {
use ferro_projections::FieldMeaning;
let schema: Value = schema_for!(FieldMeaning).to_value();
let any_of = schema["anyOf"]
.as_array()
.expect("FieldMeaning must emit anyOf");
assert!(
any_of.len() >= 2,
"expected >=2 anyOf branches, got {}",
any_of.len()
);
assert_eq!(any_of[0]["type"], "string");
let variants = any_of[0]["enum"]
.as_array()
.expect("first branch must be closed enum");
assert!(
variants.iter().any(|v| v == "money"),
"known variants must include money"
);
assert_eq!(any_of[1]["type"], "string");
assert!(
any_of[1].get("enum").is_none(),
"second branch must be open string (the Custom escape hatch)"
);
}
#[test]
fn schema_probe_intent_any_of_shape() {
use ferro_projections::Intent;
let schema: Value = schema_for!(Intent).to_value();
let any_of = schema["anyOf"].as_array().expect("Intent must emit anyOf");
assert!(
any_of.len() >= 8,
"expected >=8 anyOf branches (7 const + 1 open), got {}",
any_of.len()
);
let expected_variants = [
"browse",
"focus",
"collect",
"process",
"summarize",
"analyze",
"track",
];
for expected in expected_variants {
let found = any_of
.iter()
.any(|branch| branch.get("const").and_then(|c| c.as_str()) == Some(expected));
assert!(found, "Intent anyOf missing const branch for '{expected}'");
}
let last = any_of.last().expect("anyOf must not be empty");
assert_eq!(last["type"], "string", "last branch must be type string");
assert!(
last.get("const").is_none(),
"last branch must not have const (it is the Custom escape hatch)"
);
assert!(
last.get("enum").is_none(),
"last branch must not have enum (it is the Custom escape hatch)"
);
}
#[test]
fn schema_normalizer_strips_rejected_keywords() {
let input = json!({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Foo",
"type": "object",
"properties": {
"x": {
"type": "string",
"minLength": 1,
"enum": ["a", "b"]
}
},
"required": ["x"]
});
let out = for_structured_output(input);
assert!(out.get("$schema").is_none(), "$schema must be stripped");
assert!(out.get("title").is_none(), "title must be stripped");
assert_eq!(out["required"], json!(["x"]));
assert!(
out["properties"]["x"].get("minLength").is_none(),
"minLength must be stripped from property"
);
assert_eq!(out["properties"]["x"]["enum"], json!(["a", "b"]));
assert_eq!(out["additionalProperties"], json!(false));
}
#[test]
fn schema_normalizer_resolves_refs() {
let input = json!({
"type": "object",
"properties": {
"sub": { "$ref": "#/$defs/Sub" }
},
"required": ["sub"],
"$defs": {
"Sub": {
"type": "object",
"properties": {
"name": { "type": "string" }
},
"required": ["name"]
}
}
});
let out = for_structured_output(input);
assert!(out.get("$defs").is_none(), "$defs must be removed");
assert!(
out["properties"]["sub"].get("$ref").is_none(),
"$ref must not remain after normalization"
);
assert_eq!(out["properties"]["sub"]["type"], json!("object"));
assert_eq!(
out["properties"]["sub"]["additionalProperties"],
json!(false)
);
}
#[test]
fn schema_normalizer_preserves_enum() {
let input = json!({
"type": "object",
"properties": {
"status": {
"type": "string",
"enum": ["active", "inactive", "pending"]
}
},
"required": ["status"]
});
let out = for_structured_output(input);
let enum_val = out["properties"]["status"]["enum"]
.as_array()
.expect("enum must survive normalization");
assert_eq!(enum_val.len(), 3, "all enum variants must be preserved");
assert!(enum_val.iter().any(|v| v == "active"));
assert!(enum_val.iter().any(|v| v == "inactive"));
assert!(enum_val.iter().any(|v| v == "pending"));
}
#[test]
fn closes_field_meaning_enum() {
let input = json!({
"type": "object",
"properties": {
"meaning": { "$ref": "#/$defs/FieldMeaning" }
},
"required": ["meaning"],
"$defs": {
"FieldMeaning": {
"description": "Semantic field meaning.",
"anyOf": [
{ "type": "string", "enum": ["money", "status"] },
{ "type": "string" }
]
}
}
});
let out = for_structured_output(input);
assert!(out.get("$defs").is_none(), "$defs must be removed");
let meaning = &out["properties"]["meaning"];
assert!(
meaning.get("anyOf").is_none(),
"anyOf must be gone after closing; got meaning: {meaning:?}"
);
assert_eq!(meaning["type"], json!("string"));
let enum_val = meaning["enum"]
.as_array()
.expect("closed enum must have enum key");
assert!(enum_val.iter().any(|v| v == "money"));
assert!(enum_val.iter().any(|v| v == "status"));
assert_eq!(enum_val.len(), 2);
}
#[test]
fn non_projection_schema_not_closed() {
let input = json!({
"type": "object",
"properties": {
"status": { "$ref": "#/$defs/MyStatus" }
},
"required": ["status"],
"$defs": {
"MyStatus": {
"anyOf": [
{ "type": "string", "enum": ["active", "inactive"] },
{ "type": "string" }
]
}
}
});
let out = for_structured_output(input);
let status_schema = &out["properties"]["status"];
assert!(
status_schema.get("anyOf").is_some(),
"non-projection anyOf must survive; got status schema: {status_schema:?}"
);
}
#[test]
fn closes_intent_enum_const_branch_style() {
let input = json!({
"type": "object",
"properties": {
"intent": { "$ref": "#/$defs/Intent" }
},
"required": ["intent"],
"$defs": {
"Intent": {
"description": "Structural intent.",
"anyOf": [
{ "type": "string", "const": "browse", "description": "Browse intent." },
{ "type": "string", "const": "focus", "description": "Focus intent." },
{ "type": "string" }
]
}
}
});
let out = for_structured_output(input);
let intent = &out["properties"]["intent"];
assert!(
intent.get("anyOf").is_none(),
"anyOf must be gone after closing; got: {intent:?}"
);
assert_eq!(intent["type"], json!("string"));
let enum_val = intent["enum"]
.as_array()
.expect("closed enum must have enum key");
assert!(enum_val.iter().any(|v| v == "browse"));
assert!(enum_val.iter().any(|v| v == "focus"));
assert_eq!(enum_val.len(), 2, "only const values; open branch dropped");
}
#[test]
fn schema_normalizer_skips_additional_properties_on_anyof() {
let input = json!({
"type": "object",
"properties": {
"val": {
"anyOf": [
{ "type": "string" },
{ "type": "integer" }
]
}
},
"required": ["val"]
});
let out = for_structured_output(input);
let val_schema = &out["properties"]["val"];
assert!(
val_schema.get("additionalProperties").is_none(),
"anyOf node without properties must not get additionalProperties:false"
);
assert_eq!(out["additionalProperties"], json!(false));
}
}