use std::borrow::Cow;
use devops_models::models::validation::RepairResult;
use serde_json::Value;
pub fn repair_yaml(yaml_content: &str, schema: &Value) -> RepairResult {
let normalized = normalize_yaml(yaml_content);
let mut data: Value = match serde_yaml::from_str(&normalized) {
Ok(v) => v,
Err(e) => {
return RepairResult {
valid: false,
repaired_yaml: yaml_content.to_string(),
errors: vec![format!("YAML parse error: {e}")],
warnings: vec![],
llm_fields: vec![],
summary: format!("Cannot parse YAML: {e}"),
}
}
};
if let Some(obj) = data.as_object_mut() {
fill_defaults(obj, schema);
}
let (deterministic, ambiguous) = categorize_schema_errors(&data, schema);
let mut fix_log = Vec::new();
let mut failed_fixes = Vec::new();
for error in &deterministic {
match error.kind {
SchemaErrorKind::Type => {
if let Some(target_type) = &error.expected_type {
if apply_type_coercion(&mut data, &error.path, target_type) {
fix_log.push(format!(
"Coerced {} to type '{}'",
path_str(&error.path),
target_type
));
} else {
failed_fixes.push(path_str(&error.path));
}
}
}
SchemaErrorKind::AdditionalProperties => {
if let Some(allowed) = &error.allowed_keys {
strip_extra_keys(&mut data, &error.path, allowed);
fix_log.push(format!(
"Removed extra keys at {}",
path_str(&error.path)
));
}
}
SchemaErrorKind::Enum => {
failed_fixes.push(path_str(&error.path));
}
SchemaErrorKind::Required => {}
}
}
let repaired_yaml = match serde_yaml::to_string(&data) {
Ok(s) => s,
Err(e) => {
return RepairResult {
valid: false,
repaired_yaml: yaml_content.to_string(),
errors: vec![format!("Failed to serialize repaired YAML: {e}")],
warnings: fix_log,
llm_fields: vec![],
summary: "Repair failed during serialization".to_string(),
}
}
};
let mut remaining_errors: Vec<String> = ambiguous.iter().map(|e| e.message.clone()).collect();
remaining_errors.extend(failed_fixes.iter().map(|p| format!("Could not auto-fix: {p}")));
let llm_fields: Vec<String> = ambiguous
.iter()
.map(|e| path_str(&e.path))
.chain(failed_fixes)
.collect();
let valid = remaining_errors.is_empty();
let summary = if valid {
format!(
"YAML repaired successfully ({} fix{})",
fix_log.len(),
if fix_log.len() == 1 { "" } else { "es" }
)
} else {
format!(
"{} fix(es) applied, {} issue(s) remaining (need LLM assistance)",
fix_log.len(),
remaining_errors.len()
)
};
RepairResult {
valid,
repaired_yaml,
errors: remaining_errors,
warnings: fix_log,
llm_fields,
summary,
}
}
fn normalize_yaml(content: &str) -> String {
content
.replace("\r\n", "\n")
.replace('\r', "\n")
.trim()
.to_string()
}
fn fill_defaults(obj: &mut serde_json::Map<String, Value>, schema: &Value) {
let properties = match schema.get("properties").and_then(|p| p.as_object()) {
Some(p) => p,
None => return,
};
for (prop, subschema) in properties {
if !obj.contains_key(prop) {
if let Some(default_val) = subschema.get("default") {
obj.insert(prop.clone(), default_val.clone());
}
} else if let Some(nested_obj) = obj.get_mut(prop).and_then(|v| v.as_object_mut())
&& subschema.get("properties").is_some()
{
fill_defaults(nested_obj, subschema);
}
}
}
#[derive(Debug, Clone, PartialEq)]
enum SchemaErrorKind {
Type,
Enum,
Required,
AdditionalProperties,
}
impl std::fmt::Display for SchemaErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SchemaErrorKind::Type => write!(f, "type"),
SchemaErrorKind::Enum => write!(f, "enum"),
SchemaErrorKind::Required => write!(f, "required"),
SchemaErrorKind::AdditionalProperties => write!(f, "additionalProperties"),
}
}
}
#[derive(Debug, Clone)]
struct SchemaError {
path: Vec<String>,
kind: SchemaErrorKind,
message: String,
expected_type: Option<String>,
allowed_keys: Option<Vec<String>>,
}
impl std::fmt::Display for SchemaError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.message)
}
}
impl std::error::Error for SchemaError {}
fn categorize_schema_errors(data: &Value, schema: &Value) -> (Vec<SchemaError>, Vec<SchemaError>) {
let mut deterministic = Vec::new();
let mut ambiguous = Vec::new();
validate_against_schema(data, schema, &mut vec![], &mut deterministic, &mut ambiguous);
(deterministic, ambiguous)
}
fn validate_against_schema(
data: &Value,
schema: &Value,
path: &mut Vec<String>,
deterministic: &mut Vec<SchemaError>,
ambiguous: &mut Vec<SchemaError>,
) {
if let Some(expected_type) = schema.get("type").and_then(|t| t.as_str())
&& !value_matches_type(data, expected_type)
{
deterministic.push(SchemaError {
path: path.clone(),
kind: SchemaErrorKind::Type,
message: format!(
"{}: expected type '{}', got '{}'",
path_str(path),
expected_type,
json_type_name(data)
),
expected_type: Some(expected_type.to_string()),
allowed_keys: None,
});
return;
}
if let Some(enum_values) = schema.get("enum").and_then(|e| e.as_array())
&& !enum_values.contains(data)
{
deterministic.push(SchemaError {
path: path.clone(),
kind: SchemaErrorKind::Enum,
message: format!("{}: value not in enum {:?}", path_str(path), enum_values),
expected_type: None,
allowed_keys: None,
});
}
if let Some(required) = schema.get("required").and_then(|r| r.as_array())
&& let Some(obj) = data.as_object()
{
for req in required {
if let Some(key) = req.as_str()
&& !obj.contains_key(key)
{
ambiguous.push(SchemaError {
path: path.clone(),
kind: SchemaErrorKind::Required,
message: format!("{}: missing required field '{}'", path_str(path), key),
expected_type: None,
allowed_keys: None,
});
}
}
}
if let Some(additional) = schema.get("additionalProperties")
&& additional == &Value::Bool(false)
&& let (Some(obj), Some(props)) = (
data.as_object(),
schema.get("properties").and_then(|p| p.as_object()),
)
{
let allowed: Vec<String> = props.keys().cloned().collect();
let extra: Vec<&String> = obj.keys().filter(|k| !allowed.contains(k)).collect();
if !extra.is_empty() {
deterministic.push(SchemaError {
path: path.clone(),
kind: SchemaErrorKind::AdditionalProperties,
message: format!(
"{}: unknown fields: {}",
path_str(path),
extra.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
),
expected_type: None,
allowed_keys: Some(allowed),
});
}
}
if let Some(properties) = schema.get("properties").and_then(|p| p.as_object())
&& let Some(obj) = data.as_object()
{
for (key, subschema) in properties {
if let Some(value) = obj.get(key) {
path.push(key.clone());
validate_against_schema(value, subschema, path, deterministic, ambiguous);
path.pop();
}
}
}
if let Some(items_schema) = schema.get("items")
&& let Some(arr) = data.as_array()
{
for (i, item) in arr.iter().enumerate() {
path.push(i.to_string());
validate_against_schema(item, items_schema, path, deterministic, ambiguous);
path.pop();
}
}
}
fn apply_type_coercion(data: &mut Value, path: &[String], target_type: &str) -> bool {
let value = navigate_to_mut(data, path);
let value = match value {
Some(v) => v,
None => return false,
};
let coerced = coerce_type(value, target_type);
match coerced {
Cow::Owned(new_value) => {
*value = new_value;
true
}
Cow::Borrowed(_) => false,
}
}
fn coerce_type<'a>(value: &'a Value, target_type: &str) -> Cow<'a, Value> {
match target_type {
"integer" => {
if let Some(s) = value.as_str()
&& let Ok(n) = s.parse::<i64>()
{
return Cow::Owned(Value::Number(n.into()));
}
if let Some(f) = value.as_f64() {
return Cow::Owned(Value::Number((f as i64).into()));
}
Cow::Borrowed(value)
}
"number" => {
if let Some(s) = value.as_str()
&& let Ok(f) = s.parse::<f64>()
{
return match serde_json::Number::from_f64(f) {
Some(n) => Cow::Owned(Value::Number(n)),
None => Cow::Borrowed(value),
};
}
Cow::Borrowed(value)
}
"string" => match value {
Value::Number(n) => Cow::Owned(Value::String(n.to_string())),
Value::Bool(b) => Cow::Owned(Value::String(b.to_string())),
_ => Cow::Borrowed(value),
},
"boolean" => {
if let Some(s) = value.as_str() {
let lower = s.to_lowercase();
return Cow::Owned(Value::Bool(matches!(
lower.as_str(),
"true" | "yes" | "1" | "on"
)));
}
if let Some(n) = value.as_i64() {
return Cow::Owned(Value::Bool(n != 0));
}
Cow::Borrowed(value)
}
_ => Cow::Borrowed(value),
}
}
fn strip_extra_keys(data: &mut Value, path: &[String], allowed: &[String]) {
let node = navigate_to_mut(data, path);
if let Some(obj) = node.and_then(|v| v.as_object_mut()) {
let keys_to_remove: Vec<String> = obj
.keys()
.filter(|k| !allowed.contains(k))
.cloned()
.collect();
for key in keys_to_remove {
obj.remove(&key);
}
}
}
fn navigate_to_mut<'a>(data: &'a mut Value, path: &[String]) -> Option<&'a mut Value> {
let mut current = data;
for key in path {
current = if let Ok(idx) = key.parse::<usize>() {
current.get_mut(idx)?
} else {
current.get_mut(key.as_str())?
};
}
Some(current)
}
fn value_matches_type(value: &Value, type_name: &str) -> bool {
match type_name {
"object" => value.is_object(),
"array" => value.is_array(),
"string" => value.is_string(),
"number" => value.is_number(),
"integer" => value.is_i64() || value.is_u64(),
"boolean" => value.is_boolean(),
"null" => value.is_null(),
_ => true,
}
}
fn json_type_name(value: &Value) -> &'static str {
match value {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
fn path_str(path: &[String]) -> String {
if path.is_empty() {
"root".to_string()
} else {
path.join(" > ")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_coerce_string_to_integer() {
let v = Value::String("42".to_string());
let result = coerce_type(&v, "integer");
assert_eq!(result, Cow::Owned::<Value>(Value::Number(42.into())));
}
#[test]
fn test_coerce_string_to_boolean() {
assert_eq!(
coerce_type(&Value::String("true".to_string()), "boolean"),
Cow::Owned::<Value>(Value::Bool(true))
);
assert_eq!(
coerce_type(&Value::String("yes".to_string()), "boolean"),
Cow::Owned::<Value>(Value::Bool(true))
);
assert_eq!(
coerce_type(&Value::String("no".to_string()), "boolean"),
Cow::Owned::<Value>(Value::Bool(false))
);
}
#[test]
fn test_normalize_yaml() {
assert_eq!(normalize_yaml(" foo: bar\r\n "), "foo: bar");
}
#[test]
fn test_fill_defaults() {
let schema = serde_json::json!({
"properties": {
"replicas": { "type": "integer", "default": 1 },
"name": { "type": "string" }
}
});
let mut obj = serde_json::Map::new();
obj.insert("name".to_string(), Value::String("test".to_string()));
fill_defaults(&mut obj, &schema);
assert_eq!(obj.get("replicas"), Some(&Value::Number(1.into())));
assert_eq!(
obj.get("name"),
Some(&Value::String("test".to_string()))
);
}
}