forge-guardrails 0.1.2

Foundation types for an LLM-agent workflow framework
Documentation
use serde_json::{json, Map, Value};

pub(crate) const CAPTURE_SCHEMA_VERSION: &str = "forge-dataset-capture/v1";
pub(crate) const TRAINING_SCHEMA_VERSION: &str = "toolcall-verifier-training/v1";
pub(crate) const TRAINING_INPUT_SCHEMA_VERSION_V1: &str = "toolcall-verifier-input/v1";
pub(crate) const TRAINING_INPUT_SCHEMA_VERSION: &str = "toolcall-verifier-input/v2";
pub(crate) const ALLOWED_LABELS: &[&str] = &[
    "valid",
    "wrong_tool_semantic",
    "wrong_arguments_semantic",
    "tool_not_needed",
    "needs_clarification",
];
pub(crate) const TRAINING_LABELS: &[&str] = &[
    "valid",
    "wrong_tool_semantic",
    "wrong_arguments_semantic",
    "tool_not_needed",
    "needs_clarification",
    "deterministic_invalid",
];

pub(crate) fn is_allowed_label(label: &str) -> bool {
    ALLOWED_LABELS.contains(&label)
}

pub(crate) fn is_training_label(label: &str) -> bool {
    TRAINING_LABELS.contains(&label)
}

pub(crate) fn capture_candidate_call(name: &str, arguments: Value) -> Value {
    json!({
        "name": name,
        "arguments": arguments,
    })
}

pub(crate) fn validate_candidate_call(
    available_tools: &Value,
    candidate_call: &Value,
) -> Result<(), String> {
    let name = candidate_call
        .get("name")
        .and_then(Value::as_str)
        .ok_or_else(|| "candidate_call.name must be a string".to_string())?;
    let arguments = candidate_call
        .get("arguments")
        .and_then(Value::as_object)
        .ok_or_else(|| "candidate_call.arguments must be an object".to_string())?;
    let tool = tool_by_name(available_tools, name)
        .ok_or_else(|| format!("candidate_call.name references unknown tool '{name}'"))?;
    validate_arguments_against_tool(tool, arguments)
}

pub(crate) fn tool_by_name<'a>(available_tools: &'a Value, name: &str) -> Option<&'a Value> {
    available_tools.as_array()?.iter().find(|tool| {
        tool.get("name")
            .and_then(Value::as_str)
            .is_some_and(|tool_name| tool_name == name)
    })
}

pub(crate) fn default_arguments_for_tool(tool: &Value) -> Value {
    let mut arguments = Map::new();
    let Some(properties) = parameters(tool)
        .and_then(|params| params.get("properties"))
        .and_then(Value::as_object)
    else {
        return Value::Object(arguments);
    };

    for (name, schema) in properties {
        arguments.insert(name.clone(), default_value_for_property(name, schema));
    }
    Value::Object(arguments)
}

pub(crate) fn mutated_arguments_for_tool(tool: &Value, original: &Value) -> Option<Value> {
    let mut arguments = original.as_object().cloned().unwrap_or_else(Map::new);
    if arguments.is_empty() {
        arguments = default_arguments_for_tool(tool)
            .as_object()
            .cloned()
            .unwrap_or_else(Map::new);
    }

    let (key, value) = arguments.iter_mut().next()?;
    *value = mutated_value(key, value);
    Some(Value::Object(arguments))
}

pub(crate) fn parse_json_object_from_text(text: &str) -> Result<Value, String> {
    if let Ok(Value::Object(obj)) = serde_json::from_str::<Value>(text.trim()) {
        return Ok(Value::Object(obj));
    }
    let start = text
        .find('{')
        .ok_or_else(|| "LLM response did not contain a JSON object".to_string())?;
    let end = text
        .rfind('}')
        .ok_or_else(|| "LLM response did not contain a complete JSON object".to_string())?;
    if end < start {
        return Err("LLM response JSON object bounds are invalid".to_string());
    }
    match serde_json::from_str::<Value>(&text[start..=end]) {
        Ok(Value::Object(obj)) => Ok(Value::Object(obj)),
        Ok(_) => Err("LLM response JSON was not an object".to_string()),
        Err(err) => Err(format!("failed to parse LLM JSON response: {err}")),
    }
}

fn validate_arguments_against_tool(
    tool: &Value,
    arguments: &Map<String, Value>,
) -> Result<(), String> {
    let params = parameters(tool).ok_or_else(|| "tool.parameters must be an object".to_string())?;
    let properties = params
        .get("properties")
        .and_then(Value::as_object)
        .cloned()
        .unwrap_or_default();

    if let Some(required) = params.get("required").and_then(Value::as_array) {
        for item in required {
            let Some(name) = item.as_str() else {
                continue;
            };
            if !arguments.contains_key(name) {
                return Err(format!(
                    "candidate_call.arguments missing required key '{name}'"
                ));
            }
        }
    }

    for (key, value) in arguments {
        let Some(schema) = properties.get(key) else {
            continue;
        };
        validate_value_type(key, value, schema)?;
    }
    Ok(())
}

fn parameters(tool: &Value) -> Option<&Value> {
    tool.get("parameters").filter(|value| value.is_object())
}

fn validate_value_type(key: &str, value: &Value, schema: &Value) -> Result<(), String> {
    let Some(kind) = schema.get("type").and_then(Value::as_str) else {
        return Ok(());
    };
    let valid = match kind {
        "string" => value.is_string(),
        "integer" => value.as_i64().is_some() || value.as_u64().is_some(),
        "number" => value.as_f64().is_some(),
        "boolean" => value.is_boolean(),
        "array" => value.is_array(),
        "object" => value.is_object(),
        _ => true,
    };
    if valid {
        Ok(())
    } else {
        Err(format!("candidate_call.arguments.{key} must be {kind}"))
    }
}

fn default_value_for_property(name: &str, schema: &Value) -> Value {
    match schema.get("type").and_then(Value::as_str) {
        Some("string") => Value::String(default_string(name)),
        Some("integer") => json!(default_integer(name)),
        Some("number") => json!(default_integer(name) as f64),
        Some("boolean") => Value::Bool(true),
        Some("array") => Value::Array(default_array(name)),
        Some("object") => Value::Object(Map::new()),
        _ => Value::String(default_string(name)),
    }
}

fn default_string(name: &str) -> String {
    match name {
        "date" => "2026-06-05".to_string(),
        "path" => "docs/README.md".to_string(),
        "product_id" => "SKU-HEADPHONES-1".to_string(),
        "ticket_id" => "TCK-1001".to_string(),
        "slot_id" => "slot-001".to_string(),
        "hold_id" => "hold-001".to_string(),
        "count" => "0010".to_string(),
        "scenario" => "basic_2step".to_string(),
        "error_code" => "TOOL_CALL_REJECTED".to_string(),
        "summary" => "Forge workflow completed.".to_string(),
        "content" => "Fetched 0010 records.".to_string(),
        "query" => "project documentation".to_string(),
        "glob" => "docs/*.md".to_string(),
        "note" => "Reviewed by dataset stub.".to_string(),
        "reason" => "Requires human follow-up.".to_string(),
        _ => "test-value".to_string(),
    }
}

fn default_integer(name: &str) -> i64 {
    match name {
        "quantity" => 1,
        "duration_minutes" => 30,
        _ => 1,
    }
}

fn default_array(name: &str) -> Vec<Value> {
    match name {
        "product_ids" => vec![json!("SKU-HEADPHONES-1"), json!("SKU-DOCK-1")],
        _ => vec![Value::String("test-value".to_string())],
    }
}

fn mutated_value(name: &str, value: &Value) -> Value {
    match value {
        Value::String(_) => Value::String(format!("wrong-{name}-value")),
        Value::Number(_) => json!(9999),
        Value::Bool(current) => Value::Bool(!current),
        Value::Array(_) => Value::Array(vec![Value::String(format!("wrong-{name}-value"))]),
        Value::Object(_) | Value::Null => Value::String(format!("wrong-{name}-value")),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn tools() -> Value {
        json!([
            {
                "name": "search_products",
                "description": "Search products.",
                "parameters": {
                    "type": "object",
                    "properties": {"query": {"type": "string"}},
                    "required": ["query"]
                }
            },
            {
                "name": "add_to_cart",
                "description": "Add product to cart.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "product_id": {"type": "string"},
                        "quantity": {"type": "integer"}
                    },
                    "required": ["product_id", "quantity"]
                }
            }
        ])
    }

    #[test]
    fn validates_known_tool_and_required_arguments() {
        let candidate = json!({
            "name": "add_to_cart",
            "arguments": {"product_id": "SKU-HEADPHONES-1", "quantity": 1}
        });
        validate_candidate_call(&tools(), &candidate).expect("valid");
    }

    #[test]
    fn rejects_missing_required_argument() {
        let candidate = json!({
            "name": "add_to_cart",
            "arguments": {"product_id": "SKU-HEADPHONES-1"}
        });
        let err = validate_candidate_call(&tools(), &candidate).expect_err("invalid");
        assert!(err.contains("quantity"));
    }

    #[test]
    fn parses_json_object_from_wrapped_text() {
        let parsed =
            parse_json_object_from_text("Here is the JSON: {\"accepted\":true}").expect("json");
        assert_eq!(parsed["accepted"], true);
    }

    #[test]
    fn alternative_argument_helpers_preserve_schema_shape() {
        let available_tools = tools();
        let tool = tool_by_name(&available_tools, "add_to_cart").expect("tool");
        let defaults = default_arguments_for_tool(tool);
        validate_candidate_call(
            &available_tools,
            &json!({"name": "add_to_cart", "arguments": defaults}),
        )
        .expect("default args valid");
        let mutated = mutated_arguments_for_tool(
            tool,
            &json!({"product_id": "SKU-HEADPHONES-1", "quantity": 1}),
        )
        .expect("mutated");
        validate_candidate_call(
            &available_tools,
            &json!({"name": "add_to_cart", "arguments": mutated}),
        )
        .expect("mutated args still schema-valid");
    }
}