1use std::borrow::Cow;
29
30use devops_models::models::validation::RepairResult;
31use serde_json::Value;
32
33pub fn repair_yaml(yaml_content: &str, schema: &Value) -> RepairResult {
74 let normalized = normalize_yaml(yaml_content);
76
77 let mut data: Value = match serde_yaml::from_str(&normalized) {
79 Ok(v) => v,
80 Err(e) => {
81 return RepairResult {
82 valid: false,
83 repaired_yaml: yaml_content.to_string(),
84 errors: vec![format!("YAML parse error: {e}")],
85 warnings: vec![],
86 llm_fields: vec![],
87 summary: format!("Cannot parse YAML: {e}"),
88 }
89 }
90 };
91
92 if let Some(obj) = data.as_object_mut() {
94 fill_defaults(obj, schema);
95 }
96
97 let (deterministic, ambiguous) = categorize_schema_errors(&data, schema);
99
100 let mut fix_log = Vec::new();
102 let mut failed_fixes = Vec::new();
103
104 for error in &deterministic {
105 match error.kind {
106 SchemaErrorKind::Type => {
107 if let Some(target_type) = &error.expected_type {
108 if apply_type_coercion(&mut data, &error.path, target_type) {
109 fix_log.push(format!(
110 "Coerced {} to type '{}'",
111 path_str(&error.path),
112 target_type
113 ));
114 } else {
115 failed_fixes.push(path_str(&error.path));
116 }
117 }
118 }
119 SchemaErrorKind::AdditionalProperties => {
120 if let Some(allowed) = &error.allowed_keys {
121 strip_extra_keys(&mut data, &error.path, allowed);
122 fix_log.push(format!(
123 "Removed extra keys at {}",
124 path_str(&error.path)
125 ));
126 }
127 }
128 SchemaErrorKind::Enum => {
129 failed_fixes.push(path_str(&error.path));
130 }
131 SchemaErrorKind::Required => {}
132 }
133 }
134
135 let repaired_yaml = match serde_yaml::to_string(&data) {
137 Ok(s) => s,
138 Err(e) => {
139 return RepairResult {
140 valid: false,
141 repaired_yaml: yaml_content.to_string(),
142 errors: vec![format!("Failed to serialize repaired YAML: {e}")],
143 warnings: fix_log,
144 llm_fields: vec![],
145 summary: "Repair failed during serialization".to_string(),
146 }
147 }
148 };
149
150 let mut remaining_errors: Vec<String> = ambiguous.iter().map(|e| e.message.clone()).collect();
152 remaining_errors.extend(failed_fixes.iter().map(|p| format!("Could not auto-fix: {p}")));
153
154 let llm_fields: Vec<String> = ambiguous
155 .iter()
156 .map(|e| path_str(&e.path))
157 .chain(failed_fixes)
158 .collect();
159
160 let valid = remaining_errors.is_empty();
161 let summary = if valid {
162 format!(
163 "YAML repaired successfully ({} fix{})",
164 fix_log.len(),
165 if fix_log.len() == 1 { "" } else { "es" }
166 )
167 } else {
168 format!(
169 "{} fix(es) applied, {} issue(s) remaining (need LLM assistance)",
170 fix_log.len(),
171 remaining_errors.len()
172 )
173 };
174
175 RepairResult {
176 valid,
177 repaired_yaml,
178 errors: remaining_errors,
179 warnings: fix_log,
180 llm_fields,
181 summary,
182 }
183}
184
185fn normalize_yaml(content: &str) -> String {
188 content
189 .replace("\r\n", "\n")
190 .replace('\r', "\n")
191 .trim()
192 .to_string()
193}
194
195fn fill_defaults(obj: &mut serde_json::Map<String, Value>, schema: &Value) {
198 let properties = match schema.get("properties").and_then(|p| p.as_object()) {
199 Some(p) => p,
200 None => return,
201 };
202
203 for (prop, subschema) in properties {
204 if !obj.contains_key(prop) {
205 if let Some(default_val) = subschema.get("default") {
206 obj.insert(prop.clone(), default_val.clone());
207 }
208 } else if let Some(nested_obj) = obj.get_mut(prop).and_then(|v| v.as_object_mut())
209 && subschema.get("properties").is_some()
210 {
211 fill_defaults(nested_obj, subschema);
212 }
213 }
214}
215
216#[derive(Debug, Clone, PartialEq)]
220enum SchemaErrorKind {
221 Type,
222 Enum,
223 Required,
224 AdditionalProperties,
225}
226
227impl std::fmt::Display for SchemaErrorKind {
228 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229 match self {
230 SchemaErrorKind::Type => write!(f, "type"),
231 SchemaErrorKind::Enum => write!(f, "enum"),
232 SchemaErrorKind::Required => write!(f, "required"),
233 SchemaErrorKind::AdditionalProperties => write!(f, "additionalProperties"),
234 }
235 }
236}
237
238#[derive(Debug, Clone)]
240struct SchemaError {
241 path: Vec<String>,
242 kind: SchemaErrorKind,
243 message: String,
244 expected_type: Option<String>,
245 allowed_keys: Option<Vec<String>>,
246}
247
248impl std::fmt::Display for SchemaError {
249 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
250 write!(f, "{}", self.message)
251 }
252}
253
254impl std::error::Error for SchemaError {}
255
256fn categorize_schema_errors(data: &Value, schema: &Value) -> (Vec<SchemaError>, Vec<SchemaError>) {
257 let mut deterministic = Vec::new();
258 let mut ambiguous = Vec::new();
259
260 validate_against_schema(data, schema, &mut vec![], &mut deterministic, &mut ambiguous);
261
262 (deterministic, ambiguous)
263}
264
265fn validate_against_schema(
266 data: &Value,
267 schema: &Value,
268 path: &mut Vec<String>,
269 deterministic: &mut Vec<SchemaError>,
270 ambiguous: &mut Vec<SchemaError>,
271) {
272 if let Some(expected_type) = schema.get("type").and_then(|t| t.as_str())
274 && !value_matches_type(data, expected_type)
275 {
276 deterministic.push(SchemaError {
277 path: path.clone(),
278 kind: SchemaErrorKind::Type,
279 message: format!(
280 "{}: expected type '{}', got '{}'",
281 path_str(path),
282 expected_type,
283 json_type_name(data)
284 ),
285 expected_type: Some(expected_type.to_string()),
286 allowed_keys: None,
287 });
288 return;
289 }
290
291 if let Some(enum_values) = schema.get("enum").and_then(|e| e.as_array())
293 && !enum_values.contains(data)
294 {
295 deterministic.push(SchemaError {
296 path: path.clone(),
297 kind: SchemaErrorKind::Enum,
298 message: format!("{}: value not in enum {:?}", path_str(path), enum_values),
299 expected_type: None,
300 allowed_keys: None,
301 });
302 }
303
304 if let Some(required) = schema.get("required").and_then(|r| r.as_array())
306 && let Some(obj) = data.as_object()
307 {
308 for req in required {
309 if let Some(key) = req.as_str()
310 && !obj.contains_key(key)
311 {
312 ambiguous.push(SchemaError {
313 path: path.clone(),
314 kind: SchemaErrorKind::Required,
315 message: format!("{}: missing required field '{}'", path_str(path), key),
316 expected_type: None,
317 allowed_keys: None,
318 });
319 }
320 }
321 }
322
323 if let Some(additional) = schema.get("additionalProperties")
325 && additional == &Value::Bool(false)
326 && let (Some(obj), Some(props)) = (
327 data.as_object(),
328 schema.get("properties").and_then(|p| p.as_object()),
329 )
330 {
331 let allowed: Vec<String> = props.keys().cloned().collect();
332 let extra: Vec<&String> = obj.keys().filter(|k| !allowed.contains(k)).collect();
333 if !extra.is_empty() {
334 deterministic.push(SchemaError {
335 path: path.clone(),
336 kind: SchemaErrorKind::AdditionalProperties,
337 message: format!(
338 "{}: unknown fields: {}",
339 path_str(path),
340 extra.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
341 ),
342 expected_type: None,
343 allowed_keys: Some(allowed),
344 });
345 }
346 }
347
348 if let Some(properties) = schema.get("properties").and_then(|p| p.as_object())
350 && let Some(obj) = data.as_object()
351 {
352 for (key, subschema) in properties {
353 if let Some(value) = obj.get(key) {
354 path.push(key.clone());
355 validate_against_schema(value, subschema, path, deterministic, ambiguous);
356 path.pop();
357 }
358 }
359 }
360
361 if let Some(items_schema) = schema.get("items")
363 && let Some(arr) = data.as_array()
364 {
365 for (i, item) in arr.iter().enumerate() {
366 path.push(i.to_string());
367 validate_against_schema(item, items_schema, path, deterministic, ambiguous);
368 path.pop();
369 }
370 }
371}
372
373fn apply_type_coercion(data: &mut Value, path: &[String], target_type: &str) -> bool {
376 let value = navigate_to_mut(data, path);
377 let value = match value {
378 Some(v) => v,
379 None => return false,
380 };
381
382 let coerced = coerce_type(value, target_type);
383 match coerced {
384 Cow::Owned(new_value) => {
385 *value = new_value;
386 true
387 }
388 Cow::Borrowed(_) => false,
389 }
390}
391
392fn coerce_type<'a>(value: &'a Value, target_type: &str) -> Cow<'a, Value> {
393 match target_type {
394 "integer" => {
395 if let Some(s) = value.as_str()
396 && let Ok(n) = s.parse::<i64>()
397 {
398 return Cow::Owned(Value::Number(n.into()));
399 }
400 if let Some(f) = value.as_f64() {
401 return Cow::Owned(Value::Number((f as i64).into()));
402 }
403 Cow::Borrowed(value)
404 }
405 "number" => {
406 if let Some(s) = value.as_str()
407 && let Ok(f) = s.parse::<f64>()
408 {
409 return match serde_json::Number::from_f64(f) {
410 Some(n) => Cow::Owned(Value::Number(n)),
411 None => Cow::Borrowed(value),
412 };
413 }
414 Cow::Borrowed(value)
415 }
416 "string" => match value {
417 Value::Number(n) => Cow::Owned(Value::String(n.to_string())),
418 Value::Bool(b) => Cow::Owned(Value::String(b.to_string())),
419 _ => Cow::Borrowed(value),
420 },
421 "boolean" => {
422 if let Some(s) = value.as_str() {
423 let lower = s.to_lowercase();
424 return Cow::Owned(Value::Bool(matches!(
425 lower.as_str(),
426 "true" | "yes" | "1" | "on"
427 )));
428 }
429 if let Some(n) = value.as_i64() {
430 return Cow::Owned(Value::Bool(n != 0));
431 }
432 Cow::Borrowed(value)
433 }
434 _ => Cow::Borrowed(value),
435 }
436}
437
438fn strip_extra_keys(data: &mut Value, path: &[String], allowed: &[String]) {
439 let node = navigate_to_mut(data, path);
440 if let Some(obj) = node.and_then(|v| v.as_object_mut()) {
441 let keys_to_remove: Vec<String> = obj
442 .keys()
443 .filter(|k| !allowed.contains(k))
444 .cloned()
445 .collect();
446 for key in keys_to_remove {
447 obj.remove(&key);
448 }
449 }
450}
451
452fn navigate_to_mut<'a>(data: &'a mut Value, path: &[String]) -> Option<&'a mut Value> {
455 let mut current = data;
456 for key in path {
457 current = if let Ok(idx) = key.parse::<usize>() {
458 current.get_mut(idx)?
459 } else {
460 current.get_mut(key.as_str())?
461 };
462 }
463 Some(current)
464}
465
466fn value_matches_type(value: &Value, type_name: &str) -> bool {
467 match type_name {
468 "object" => value.is_object(),
469 "array" => value.is_array(),
470 "string" => value.is_string(),
471 "number" => value.is_number(),
472 "integer" => value.is_i64() || value.is_u64(),
473 "boolean" => value.is_boolean(),
474 "null" => value.is_null(),
475 _ => true,
476 }
477}
478
479fn json_type_name(value: &Value) -> &'static str {
480 match value {
481 Value::Null => "null",
482 Value::Bool(_) => "boolean",
483 Value::Number(_) => "number",
484 Value::String(_) => "string",
485 Value::Array(_) => "array",
486 Value::Object(_) => "object",
487 }
488}
489
490fn path_str(path: &[String]) -> String {
491 if path.is_empty() {
492 "root".to_string()
493 } else {
494 path.join(" > ")
495 }
496}
497
498#[cfg(test)]
499mod tests {
500 use super::*;
501
502 #[test]
503 fn test_coerce_string_to_integer() {
504 let v = Value::String("42".to_string());
505 let result = coerce_type(&v, "integer");
506 assert_eq!(result, Cow::Owned::<Value>(Value::Number(42.into())));
507 }
508
509 #[test]
510 fn test_coerce_string_to_boolean() {
511 assert_eq!(
512 coerce_type(&Value::String("true".to_string()), "boolean"),
513 Cow::Owned::<Value>(Value::Bool(true))
514 );
515 assert_eq!(
516 coerce_type(&Value::String("yes".to_string()), "boolean"),
517 Cow::Owned::<Value>(Value::Bool(true))
518 );
519 assert_eq!(
520 coerce_type(&Value::String("no".to_string()), "boolean"),
521 Cow::Owned::<Value>(Value::Bool(false))
522 );
523 }
524
525 #[test]
526 fn test_normalize_yaml() {
527 assert_eq!(normalize_yaml(" foo: bar\r\n "), "foo: bar");
528 }
529
530 #[test]
531 fn test_fill_defaults() {
532 let schema = serde_json::json!({
533 "properties": {
534 "replicas": { "type": "integer", "default": 1 },
535 "name": { "type": "string" }
536 }
537 });
538 let mut obj = serde_json::Map::new();
539 obj.insert("name".to_string(), Value::String("test".to_string()));
540
541 fill_defaults(&mut obj, &schema);
542
543 assert_eq!(obj.get("replicas"), Some(&Value::Number(1.into())));
544 assert_eq!(
545 obj.get("name"),
546 Some(&Value::String("test".to_string()))
547 );
548 }
549}