dirge-agent 0.7.4

Minimalistic coding agent written in Rust, optimized for memory footprint and performance
//! Structured validation-error formatting. Split out of
//! `agent/agent_loop/tool_input_repair.rs` (dirge-4y4l stage 10b):
//! turns a validation failure into a model-readable retry hint
//! (Expected / Got / Try).

use serde_json::Value;

use super::validate::parse_json_pointer;

/// Produce a model-readable retry hint from a validation failure.
///
/// Format:
/// ```text
/// Tool input rejected: <plain English summary>
/// Expected: <schema slice>
/// Got:      <truncated value>
/// Try:      <one concrete hint>
/// ```
pub fn format_structured_error(schema: &Value, args: &Value, errors: &[String]) -> String {
    let summary = errors.join("; ");
    let args_str = serde_json::to_string(args).unwrap_or_default();
    let truncated = if args_str.len() > 200 {
        format!("{}", crate::text::head(&args_str, 200))
    } else {
        args_str
    };

    let schema_hint = extract_schema_hint(schema, errors);
    // A closed-set (enum) violation gets a precise hint — the valid
    // values plus the nearest match — rather than the generic fallback.
    let concrete_hint =
        enum_hint(schema, args, errors).unwrap_or_else(|| build_concrete_hint(errors));

    format!(
        "Tool input rejected: {summary}\n\
         Expected: {schema_hint}\n\
         Got:      {truncated}\n\
         Try:      {concrete_hint}"
    )
}

/// When a validation error is a closed-set (enum) violation, build a
/// hint listing the valid values and — if the offending value is a
/// string — the nearest one. Returns `None` for non-enum errors so the
/// caller falls back to the generic hint.
fn enum_hint(schema: &Value, args: &Value, errors: &[String]) -> Option<String> {
    for err in errors {
        let path_start = err.strip_prefix("at /")?;
        let path = path_start.split(':').next().unwrap_or(path_start).trim();
        let parts = parse_json_pointer(&format!("/{path}"));
        let prop_schema = navigate_schema(schema, &parts)?;
        let variants = prop_schema.get("enum").and_then(|v| v.as_array())?;

        let valid: Vec<String> = variants
            .iter()
            .map(|v| match v {
                Value::String(s) => s.clone(),
                other => other.to_string(),
            })
            .collect();
        if valid.is_empty() {
            continue;
        }

        // The offending value, read back from the args at the same path.
        let got = args.pointer(&format!("/{path}"));
        let mut hint = format!("Valid values: {}", valid.join(", "));
        if let Some(Value::String(bad)) = got
            && let Some(near) = crate::agent::agent_loop::suggest::closest(bad, &valid)
        {
            hint.push_str(&format!(". Did you mean `{near}`?"));
        }
        return Some(hint);
    }
    None
}

fn extract_schema_hint(schema: &Value, errors: &[String]) -> String {
    for err in errors {
        if let Some(path_start) = err.strip_prefix("at /") {
            let path = path_start.split(':').next().unwrap_or(path_start).trim();
            let parts = parse_json_pointer(&format!("/{path}"));
            if let Some(prop_schema) = navigate_schema(schema, &parts) {
                return serde_json::to_string(prop_schema)
                    .unwrap_or_else(|_| "(schema unavailable)".into());
            }
        }
    }
    "(see tool schema)".into()
}

/// Walk a JSON Schema along a parsed JSON Pointer path. Each path
/// segment is either an object property (looked up via `properties`)
/// or a numeric array index (descended via `items`). Returns the
/// schema node at the requested path, or `None` if any segment can't
/// be resolved.
///
/// Tested via `navigate_schema_descends_into_array_items` —
/// a `/edits/0/path` style pointer reaches the per-item `path`
/// schema rather than falling back to the default "(see tool
/// schema)" hint.
pub(super) fn navigate_schema<'a>(schema: &'a Value, parts: &[String]) -> Option<&'a Value> {
    let mut current = schema;
    for part in parts {
        if part.parse::<usize>().is_ok() {
            // Numeric index — the parent schema must describe an
            // array; descend into its `items`.
            current = current.get("items")?;
        } else {
            current = current.get("properties")?.get(part)?;
        }
    }
    Some(current)
}

pub(super) fn build_concrete_hint(errors: &[String]) -> String {
    for err in errors {
        let lower = err.to_lowercase();
        if lower.contains("null") {
            return "Remove the null value — the field is not required".into();
        }
        if lower.contains("array") && lower.contains("string") {
            return "Wrap the value in square brackets to make it an array".into();
        }
        if lower.contains("array") && lower.contains("object") {
            return "Replace {} with [] (empty array)".into();
        }
        if lower.contains("array") {
            return "The value should be an array, e.g. wrap it in square brackets".into();
        }
        if lower.contains("missing") {
            return "Make sure all required fields are present".into();
        }
    }
    "Check the tool schema and retry with valid arguments".into()
}

/// Detect whether a field name looks like a filesystem path.
/// Used by Phase 2 (markdown auto-link unwrap).
pub fn is_path_field_name(key: &str) -> bool {
    matches!(key, "path" | "file_path" | "filename" | "paths" | "dir")
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    fn schema_with_enum() -> Value {
        json!({
            "type": "object",
            "properties": {
                "mode": { "type": "string", "enum": ["read", "write", "append"] }
            }
        })
    }

    #[test]
    fn enum_violation_lists_valid_values_and_nearest() {
        let schema = schema_with_enum();
        let args = json!({ "mode": "writ" });
        // Shape of the jsonschema enum error the dispatcher passes in.
        let errors = vec!["at /mode: \"writ\" is not one of [...]".to_string()];
        let out = format_structured_error(&schema, &args, &errors);
        assert!(out.contains("Valid values: read, write, append"), "{out}");
        assert!(out.contains("Did you mean `write`?"), "{out}");
    }

    #[test]
    fn enum_without_close_match_still_lists_values() {
        let schema = schema_with_enum();
        let args = json!({ "mode": "zzzzzz" });
        let errors = vec!["at /mode: \"zzzzzz\" is not one of [...]".to_string()];
        let out = format_structured_error(&schema, &args, &errors);
        assert!(out.contains("Valid values: read, write, append"), "{out}");
        assert!(
            !out.contains("Did you mean"),
            "no near match → no guess: {out}"
        );
    }

    #[test]
    fn non_enum_error_uses_generic_hint() {
        let schema = json!({
            "type": "object",
            "properties": { "items": { "type": "array" } }
        });
        let args = json!({ "items": "x" });
        let errors = vec!["at /items: \"x\" is not of type array".to_string()];
        let out = format_structured_error(&schema, &args, &errors);
        assert!(!out.contains("Valid values:"), "{out}");
        assert!(
            out.contains("array"),
            "falls back to the generic array hint: {out}"
        );
    }
}