use std::collections::BTreeMap;
use std::rc::Rc;
use serde_json::Value as JsonValue;
use crate::stdlib::{json_to_vm_value, schema_result_value};
use crate::value::{VmError, VmValue};
use super::helpers::extract_llm_options;
use super::{execute_schema_retry_loop, structured_output_errors};
const STAGE_PARSED: &str = "parsed";
const STAGE_EXTRACTED: &str = "extracted";
const STAGE_REGEX: &str = "regex";
const STAGE_LLM_REPAIR: &str = "llm_repair";
const STAGE_FAILED: &str = "failed";
const ERR_SCHEMA_VALIDATION: &str = "schema_validation";
const ERR_REPAIR_FAILED: &str = "repair_failed";
const ERR_TRANSPORT: &str = "transport";
pub(crate) async fn schema_recover_impl(
args: Vec<VmValue>,
bridge: Option<&Rc<crate::bridge::HostBridge>>,
) -> Result<VmValue, VmError> {
if args.len() < 2 {
return Err(VmError::Runtime(
"schema_recover: expected (text: string, schema: dict, opts?: dict)".to_string(),
));
}
let text = args[0].display();
let schema_value = match &args[1] {
VmValue::Dict(_) => args[1].clone(),
other => {
return Err(VmError::Runtime(format!(
"schema_recover: schema must be a dict, got {}",
other.type_name(),
)));
}
};
let opts = args.get(2).and_then(|a| a.as_dict()).cloned();
let apply_defaults = opt_bool_field(&opts, "apply_defaults");
let mut last_errors: Vec<String>;
let mut attempts: usize = 0;
attempts += 1;
match try_parse_and_validate(&text, &schema_value, apply_defaults) {
Ok(data) => {
return Ok(envelope_success(data, &text, STAGE_PARSED, attempts, false));
}
Err(errs) => {
last_errors = errs;
}
}
let extracted = crate::stdlib::json::extract_json_from_text(&text);
if extracted.trim() != text.trim() {
attempts += 1;
match try_parse_and_validate(&extracted, &schema_value, apply_defaults) {
Ok(data) => {
return Ok(envelope_success(
data,
&text,
STAGE_EXTRACTED,
attempts,
false,
));
}
Err(errs) => {
last_errors = errs;
}
}
}
attempts += 1;
match try_regex_recover(&text, &schema_value, apply_defaults) {
Ok(Some(data)) => {
return Ok(envelope_success(data, &text, STAGE_REGEX, attempts, false));
}
Ok(None) => {
}
Err(errs) => {
last_errors = errs;
}
}
let repair = parse_llm_repair_config(&opts);
if repair.enabled {
attempts += 1;
match run_llm_repair(&text, &schema_value, &repair, &opts, bridge).await {
Ok(Some(data)) => {
return Ok(envelope_success(
data,
&text,
STAGE_LLM_REPAIR,
attempts,
true,
));
}
Ok(None) => {
return Ok(envelope_failure(
&text,
STAGE_LLM_REPAIR,
ERR_REPAIR_FAILED,
"LLM repair pass returned invalid JSON",
attempts,
));
}
Err(message) => {
return Ok(envelope_failure(
&text,
STAGE_LLM_REPAIR,
ERR_TRANSPORT,
&message,
attempts,
));
}
}
}
let message = if last_errors.is_empty() {
"schema_recover: no recoverable JSON found".to_string()
} else {
last_errors.join("; ")
};
Ok(envelope_failure(
&text,
STAGE_FAILED,
ERR_SCHEMA_VALIDATION,
&message,
attempts,
))
}
fn try_parse_and_validate(
text: &str,
schema: &VmValue,
apply_defaults: bool,
) -> Result<VmValue, Vec<String>> {
let trimmed = text.trim();
if trimmed.is_empty() {
return Err(vec!["empty input".to_string()]);
}
let parsed = match serde_json::from_str::<JsonValue>(trimmed) {
Ok(v) => v,
Err(e) => return Err(vec![format!("JSON parse error: {e}")]),
};
let vm_value = json_to_vm_value(&parsed);
let result = schema_result_value(&vm_value, schema, apply_defaults);
extract_validation_outcome(&result)
}
fn extract_validation_outcome(result: &VmValue) -> Result<VmValue, Vec<String>> {
match result {
VmValue::EnumVariant {
enum_name,
variant,
fields,
} if enum_name.as_ref() == "Result" => match variant.as_ref() {
"Ok" => Ok(fields.first().cloned().unwrap_or(VmValue::Nil)),
"Err" => {
let errors = fields
.first()
.and_then(|payload| payload.as_dict())
.and_then(|payload| payload.get("errors"))
.and_then(|errors| match errors {
VmValue::List(items) => {
Some(items.iter().map(|err| err.display()).collect())
}
_ => None,
})
.unwrap_or_else(|| vec!["schema validation failed".to_string()]);
Err(errors)
}
other => Err(vec![format!(
"unexpected Result variant from schema validation: {other}"
)]),
},
_ => Err(vec!["schema validation did not return a Result".to_string()]),
}
}
fn try_regex_recover(
text: &str,
schema: &VmValue,
apply_defaults: bool,
) -> Result<Option<VmValue>, Vec<String>> {
let schema_dict = match schema.as_dict() {
Some(d) => d,
None => return Ok(None),
};
let is_object = matches!(
schema_dict.get("type"),
Some(VmValue::String(s)) if s.as_ref() == "object"
) || schema_dict.contains_key("properties");
if !is_object {
return Ok(None);
}
let properties = match schema_dict.get("properties") {
Some(VmValue::Dict(p)) => p.clone(),
_ => return Ok(None),
};
let mut recovered: BTreeMap<String, VmValue> = BTreeMap::new();
let mut any = false;
for (field, field_schema) in properties.iter() {
let field_type = field_type_name(field_schema);
if field_type.is_none() {
continue;
}
let ty = field_type.unwrap();
if let Some(value) = scrape_field(text, field, ty, field_schema) {
recovered.insert(field.clone(), value);
any = true;
}
}
if !any {
return Ok(None);
}
let candidate = VmValue::Dict(Rc::new(recovered));
let result = schema_result_value(&candidate, schema, apply_defaults);
match extract_validation_outcome(&result) {
Ok(data) => Ok(Some(data)),
Err(_) => Ok(None),
}
}
fn field_type_name(field_schema: &VmValue) -> Option<&'static str> {
let dict = field_schema.as_dict()?;
let ty = dict.get("type")?;
match ty {
VmValue::String(s) => match s.as_ref() {
"string" => Some("string"),
"integer" => Some("integer"),
"number" => Some("number"),
"boolean" => Some("boolean"),
_ => None,
},
VmValue::List(items) => items.iter().find_map(|item| {
if let VmValue::String(s) = item {
match s.as_ref() {
"string" => Some("string"),
"integer" => Some("integer"),
"number" => Some("number"),
"boolean" => Some("boolean"),
_ => None,
}
} else {
None
}
}),
_ => None,
}
}
fn scrape_field(
text: &str,
field: &str,
field_type: &'static str,
field_schema: &VmValue,
) -> Option<VmValue> {
let escaped = regex::escape(field);
let patterns = [
format!(r#""{esc}"\s*:\s*"((?:[^"\\]|\\.)*)""#, esc = escaped),
format!(r#""{esc}"\s*:\s*([^,\n\r}}]+)"#, esc = escaped),
format!(r#"\b{esc}\s*[:=]\s*"((?:[^"\\]|\\.)*)""#, esc = escaped),
format!(r#"\b{esc}\s*[:=]\s*([^,\n\r}}]+)"#, esc = escaped),
];
for pat in &patterns {
let re = match regex::Regex::new(pat) {
Ok(re) => re,
Err(_) => continue,
};
if let Some(caps) = re.captures(text) {
if let Some(m) = caps.get(1) {
let raw = m.as_str().trim();
if let Some(value) = coerce_scalar(raw, field_type, field_schema) {
return Some(value);
}
}
}
}
None
}
fn coerce_scalar(raw: &str, field_type: &str, field_schema: &VmValue) -> Option<VmValue> {
let cleaned = raw
.trim_end_matches([',', ';', '}', ']'])
.trim()
.to_string();
if cleaned.is_empty() {
return None;
}
match field_type {
"string" => {
let unquoted = strip_surrounding_quotes(&cleaned);
let unescaped = unescape_json_string(&unquoted);
if unescaped.eq_ignore_ascii_case("null") && !field_allows_null(field_schema) {
return None;
}
Some(VmValue::String(Rc::from(unescaped.as_str())))
}
"integer" => {
let n: i64 = cleaned.parse().ok()?;
Some(VmValue::Int(n))
}
"number" => {
if let Ok(n) = cleaned.parse::<i64>() {
Some(VmValue::Int(n))
} else {
let n: f64 = cleaned.parse().ok()?;
Some(VmValue::Float(n))
}
}
"boolean" => parse_bool_token(&cleaned).map(VmValue::Bool),
_ => None,
}
}
fn parse_bool_token(s: &str) -> Option<bool> {
match s.to_ascii_lowercase().as_str() {
"true" | "yes" | "y" | "on" | "1" => Some(true),
"false" | "no" | "n" | "off" | "0" => Some(false),
_ => None,
}
}
fn strip_surrounding_quotes(s: &str) -> String {
let bytes = s.as_bytes();
if bytes.len() >= 2 {
let first = bytes[0];
let last = bytes[bytes.len() - 1];
if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
return s[1..s.len() - 1].to_string();
}
}
s.to_string()
}
fn unescape_json_string(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c != '\\' {
out.push(c);
continue;
}
match chars.next() {
Some('"') => out.push('"'),
Some('\\') => out.push('\\'),
Some('/') => out.push('/'),
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some('r') => out.push('\r'),
Some(other) => {
out.push('\\');
out.push(other);
}
None => out.push('\\'),
}
}
out
}
fn field_allows_null(field_schema: &VmValue) -> bool {
let dict = match field_schema.as_dict() {
Some(d) => d,
None => return false,
};
match dict.get("type") {
Some(VmValue::String(s)) => s.as_ref() == "null",
Some(VmValue::List(items)) => items
.iter()
.any(|item| matches!(item, VmValue::String(s) if s.as_ref() == "null")),
_ => false,
}
}
#[derive(Clone)]
struct LlmRepairConfig {
enabled: bool,
overrides: BTreeMap<String, VmValue>,
}
fn parse_llm_repair_config(opts: &Option<BTreeMap<String, VmValue>>) -> LlmRepairConfig {
let Some(opts) = opts.as_ref() else {
return LlmRepairConfig {
enabled: true,
overrides: BTreeMap::new(),
};
};
let raw = opts.get("llm_repair");
match raw {
None => LlmRepairConfig {
enabled: true,
overrides: BTreeMap::new(),
},
Some(VmValue::Nil) => LlmRepairConfig {
enabled: true,
overrides: BTreeMap::new(),
},
Some(VmValue::Bool(b)) => LlmRepairConfig {
enabled: *b,
overrides: BTreeMap::new(),
},
Some(VmValue::Dict(d)) => {
let enabled = match d.get("enabled") {
None => true,
Some(VmValue::Bool(false)) => false,
Some(VmValue::Nil) => true,
Some(_) => true,
};
let mut overrides: BTreeMap<String, VmValue> = (**d).clone();
overrides.remove("enabled");
LlmRepairConfig { enabled, overrides }
}
Some(_) => LlmRepairConfig {
enabled: false,
overrides: BTreeMap::new(),
},
}
}
fn opt_bool_field(opts: &Option<BTreeMap<String, VmValue>>, key: &str) -> bool {
matches!(
opts.as_ref().and_then(|o| o.get(key)),
Some(VmValue::Bool(true))
)
}
async fn run_llm_repair(
text: &str,
schema: &VmValue,
repair: &LlmRepairConfig,
base_opts: &Option<BTreeMap<String, VmValue>>,
bridge: Option<&Rc<crate::bridge::HostBridge>>,
) -> Result<Option<VmValue>, String> {
let prompt = build_repair_prompt(text, schema);
let merged_options = merge_repair_options(base_opts.as_ref(), &repair.overrides, schema);
let merged_dict = Some(merged_options.clone());
let args = vec![
VmValue::String(Rc::from(prompt.as_str())),
VmValue::Nil,
VmValue::Dict(Rc::new(merged_options)),
];
let opts = extract_llm_options(&args).map_err(|e| e.to_string())?;
let outcome = execute_schema_retry_loop(opts.clone(), merged_dict, bridge)
.await
.map_err(|e| e.to_string())?;
if !outcome.errors.is_empty() {
return Ok(None);
}
let errors = structured_output_errors(&outcome.vm_result, &opts);
if !errors.is_empty() {
return Ok(None);
}
let data = outcome
.vm_result
.as_dict()
.and_then(|d| d.get("data").cloned())
.unwrap_or(VmValue::Nil);
Ok(Some(data))
}
fn build_repair_prompt(raw_text: &str, schema: &VmValue) -> String {
let schema_text = schema_to_compact_json(schema);
let mut s = String::from(
"The following text was supposed to be JSON conforming to the schema below, but it failed validation. \
Repair it and respond with ONLY the corrected JSON — no prose, no markdown fences, no commentary.\n\n",
);
s.push_str("Target schema:\n");
s.push_str(&schema_text);
s.push_str("\n\nOriginal text:\n");
s.push_str(raw_text);
s.push_str("\n\nReply with valid JSON only.");
s
}
fn schema_to_compact_json(schema: &VmValue) -> String {
let json = super::helpers::vm_value_to_json(schema);
serde_json::to_string(&json).unwrap_or_else(|_| "{}".to_string())
}
fn merge_repair_options(
base: Option<&BTreeMap<String, VmValue>>,
overrides: &BTreeMap<String, VmValue>,
schema: &VmValue,
) -> BTreeMap<String, VmValue> {
let mut merged: BTreeMap<String, VmValue> = base.cloned().unwrap_or_default();
merged.insert("schema_retries".to_string(), VmValue::Int(0));
merged.remove("llm_repair");
merged.remove("apply_defaults");
merged.insert("output_schema".to_string(), schema.clone());
merged.insert("json_schema".to_string(), schema.clone());
merged
.entry("output_format".to_string())
.or_insert_with(|| {
let mut fmt = BTreeMap::new();
fmt.insert("kind".to_string(), VmValue::String(Rc::from("json_schema")));
fmt.insert("schema".to_string(), schema.clone());
fmt.insert("strict".to_string(), VmValue::Bool(true));
VmValue::Dict(Rc::new(fmt))
});
merged
.entry("output_validation".to_string())
.or_insert(VmValue::String(Rc::from("error")));
merged
.entry("response_format".to_string())
.or_insert(VmValue::String(Rc::from("json")));
for (k, v) in overrides {
merged.insert(k.clone(), v.clone());
}
merged
}
fn envelope_success(
data: VmValue,
raw_text: &str,
stage: &str,
attempts: usize,
repaired: bool,
) -> VmValue {
let mut env = BTreeMap::new();
env.insert("ok".to_string(), VmValue::Bool(true));
env.insert("data".to_string(), data);
env.insert("raw_text".to_string(), VmValue::String(Rc::from(raw_text)));
env.insert("error".to_string(), VmValue::String(Rc::from("")));
env.insert("error_category".to_string(), VmValue::Nil);
env.insert("attempts".to_string(), VmValue::Int(attempts as i64));
env.insert("stage".to_string(), VmValue::String(Rc::from(stage)));
env.insert("repaired".to_string(), VmValue::Bool(repaired));
VmValue::Dict(Rc::new(env))
}
fn envelope_failure(
raw_text: &str,
stage: &str,
error_category: &str,
error_message: &str,
attempts: usize,
) -> VmValue {
let mut env = BTreeMap::new();
env.insert("ok".to_string(), VmValue::Bool(false));
env.insert("data".to_string(), VmValue::Nil);
env.insert("raw_text".to_string(), VmValue::String(Rc::from(raw_text)));
env.insert(
"error".to_string(),
VmValue::String(Rc::from(error_message)),
);
env.insert(
"error_category".to_string(),
VmValue::String(Rc::from(error_category)),
);
env.insert("attempts".to_string(), VmValue::Int(attempts as i64));
env.insert("stage".to_string(), VmValue::String(Rc::from(stage)));
env.insert("repaired".to_string(), VmValue::Bool(false));
VmValue::Dict(Rc::new(env))
}
#[cfg(test)]
mod tests {
use super::*;
fn person_schema() -> VmValue {
let mut name = BTreeMap::new();
name.insert("type".to_string(), VmValue::String(Rc::from("string")));
let mut age = BTreeMap::new();
age.insert("type".to_string(), VmValue::String(Rc::from("integer")));
let mut active = BTreeMap::new();
active.insert("type".to_string(), VmValue::String(Rc::from("boolean")));
let mut props = BTreeMap::new();
props.insert("name".to_string(), VmValue::Dict(Rc::new(name)));
props.insert("age".to_string(), VmValue::Dict(Rc::new(age)));
props.insert("active".to_string(), VmValue::Dict(Rc::new(active)));
let required = VmValue::List(Rc::new(vec![
VmValue::String(Rc::from("name")),
VmValue::String(Rc::from("age")),
]));
let mut schema = BTreeMap::new();
schema.insert("type".to_string(), VmValue::String(Rc::from("object")));
schema.insert("properties".to_string(), VmValue::Dict(Rc::new(props)));
schema.insert("required".to_string(), required);
VmValue::Dict(Rc::new(schema))
}
#[test]
fn parses_clean_json_directly() {
let schema = person_schema();
let result = try_parse_and_validate(
r#"{"name": "Ada", "age": 36, "active": true}"#,
&schema,
false,
)
.unwrap();
let dict = result.as_dict().unwrap();
assert_eq!(dict.get("name").unwrap().display(), "Ada");
assert_eq!(dict.get("age").unwrap().as_int(), Some(36));
assert!(matches!(dict.get("active"), Some(VmValue::Bool(true))));
}
#[test]
fn rejects_validation_failure() {
let schema = person_schema();
let err = try_parse_and_validate(r#"{"name": 42}"#, &schema, false).unwrap_err();
assert!(!err.is_empty());
}
#[test]
fn extracts_field_with_quoted_key_and_value() {
let schema = person_schema();
let scraped = scrape_field(
r#"the result is "name": "Ada Lovelace", others omitted"#,
"name",
"string",
schema
.as_dict()
.unwrap()
.get("properties")
.unwrap()
.as_dict()
.unwrap()
.get("name")
.unwrap(),
);
assert_eq!(scraped.unwrap().display(), "Ada Lovelace");
}
#[test]
fn extracts_field_with_yaml_shape() {
let schema = person_schema();
let scraped = scrape_field(
"name: Ada\nage: 36\nactive: yes\n",
"age",
"integer",
schema
.as_dict()
.unwrap()
.get("properties")
.unwrap()
.as_dict()
.unwrap()
.get("age")
.unwrap(),
);
assert_eq!(scraped.unwrap().as_int(), Some(36));
}
#[test]
fn extracts_boolean_via_yes_token() {
let schema = person_schema();
let scraped = scrape_field(
"name: Ada\nage: 36\nactive: yes\n",
"active",
"boolean",
schema
.as_dict()
.unwrap()
.get("properties")
.unwrap()
.as_dict()
.unwrap()
.get("active")
.unwrap(),
);
assert!(matches!(scraped, Some(VmValue::Bool(true))));
}
#[test]
fn regex_recover_assembles_partial_dict() {
let schema = person_schema();
let raw = "Here is the answer: name: \"Grace Hopper\", age: 85, active: false";
let result = try_regex_recover(raw, &schema, false).unwrap().unwrap();
let dict = result.as_dict().unwrap();
assert_eq!(dict.get("name").unwrap().display(), "Grace Hopper");
assert_eq!(dict.get("age").unwrap().as_int(), Some(85));
assert!(matches!(dict.get("active"), Some(VmValue::Bool(false))));
}
#[test]
fn regex_recover_returns_none_when_required_field_missing() {
let schema = person_schema();
let raw = "active: true";
let outcome = try_regex_recover(raw, &schema, false).unwrap();
assert!(outcome.is_none());
}
#[test]
fn regex_recover_skips_when_schema_is_not_object() {
let mut scalar = BTreeMap::new();
scalar.insert("type".to_string(), VmValue::String(Rc::from("string")));
let schema = VmValue::Dict(Rc::new(scalar));
let outcome = try_regex_recover("hello", &schema, false).unwrap();
assert!(outcome.is_none());
}
#[test]
fn coerce_handles_unquoted_string_with_trailing_punct() {
let mut field = BTreeMap::new();
field.insert("type".to_string(), VmValue::String(Rc::from("string")));
let v = coerce_scalar("Ada,", "string", &VmValue::Dict(Rc::new(field))).unwrap();
assert_eq!(v.display(), "Ada");
}
#[test]
fn coerce_handles_escaped_quotes_in_string() {
let mut field = BTreeMap::new();
field.insert("type".to_string(), VmValue::String(Rc::from("string")));
let v = coerce_scalar(
"he said \\\"hi\\\"",
"string",
&VmValue::Dict(Rc::new(field)),
)
.unwrap();
assert_eq!(v.display(), "he said \"hi\"");
}
#[test]
fn coerce_rejects_null_for_non_nullable_string() {
let mut field = BTreeMap::new();
field.insert("type".to_string(), VmValue::String(Rc::from("string")));
let v = coerce_scalar("null", "string", &VmValue::Dict(Rc::new(field)));
assert!(v.is_none());
}
#[test]
fn parse_repair_config_disable_via_bool() {
let mut opts = BTreeMap::new();
opts.insert("llm_repair".to_string(), VmValue::Bool(false));
let cfg = parse_llm_repair_config(&Some(opts));
assert!(!cfg.enabled);
}
#[test]
fn parse_repair_config_enabled_when_unspecified() {
let cfg = parse_llm_repair_config(&None);
assert!(cfg.enabled);
}
#[test]
fn parse_repair_config_dict_extracts_overrides() {
let mut repair = BTreeMap::new();
repair.insert("model".to_string(), VmValue::String(Rc::from("local:fix")));
repair.insert("max_tokens".to_string(), VmValue::Int(400));
let mut opts = BTreeMap::new();
opts.insert("llm_repair".to_string(), VmValue::Dict(Rc::new(repair)));
let cfg = parse_llm_repair_config(&Some(opts));
assert!(cfg.enabled);
assert_eq!(
cfg.overrides.get("model").map(VmValue::display).as_deref(),
Some("local:fix")
);
assert_eq!(
cfg.overrides.get("max_tokens").and_then(VmValue::as_int),
Some(400)
);
}
#[test]
fn merge_repair_caps_schema_retries_and_installs_schema() {
let schema = person_schema();
let mut base = BTreeMap::new();
base.insert("schema_retries".to_string(), VmValue::Int(7));
base.insert("llm_repair".to_string(), VmValue::Bool(true));
base.insert("apply_defaults".to_string(), VmValue::Bool(true));
let merged = merge_repair_options(Some(&base), &BTreeMap::new(), &schema);
assert_eq!(
merged.get("schema_retries").and_then(VmValue::as_int),
Some(0)
);
assert!(merged.contains_key("output_schema"));
assert!(!merged.contains_key("llm_repair"));
assert!(!merged.contains_key("apply_defaults"));
assert_eq!(
merged
.get("output_validation")
.map(VmValue::display)
.as_deref(),
Some("error")
);
}
#[test]
fn merge_repair_overrides_win_over_base() {
let schema = person_schema();
let mut base = BTreeMap::new();
base.insert("model".to_string(), VmValue::String(Rc::from("base:big")));
let mut overrides = BTreeMap::new();
overrides.insert(
"model".to_string(),
VmValue::String(Rc::from("override:small")),
);
let merged = merge_repair_options(Some(&base), &overrides, &schema);
assert_eq!(
merged.get("model").map(VmValue::display).as_deref(),
Some("override:small")
);
}
#[test]
fn build_repair_prompt_includes_schema_and_text() {
let schema = person_schema();
let prompt = build_repair_prompt(r#"{"name": 42}"#, &schema);
assert!(prompt.contains("Target schema"));
assert!(prompt.contains("Original text"));
assert!(prompt.contains(r#"{"name": 42}"#));
assert!(prompt.contains("Reply with valid JSON only"));
}
#[tokio::test]
async fn schema_recover_stage_parsed_for_clean_json() {
let schema = person_schema();
let args = vec![
VmValue::String(Rc::from(r#"{"name": "Ada", "age": 36}"#)),
schema,
];
let env = schema_recover_impl(args, None).await.unwrap();
let dict = env.as_dict().unwrap();
assert!(matches!(dict.get("ok"), Some(VmValue::Bool(true))));
assert_eq!(
dict.get("stage").map(VmValue::display).as_deref(),
Some("parsed"),
);
assert_eq!(dict.get("attempts").and_then(VmValue::as_int), Some(1));
assert!(matches!(dict.get("repaired"), Some(VmValue::Bool(false))));
}
#[tokio::test]
async fn schema_recover_stage_extracted_for_fenced_json() {
let schema = person_schema();
let args = vec![
VmValue::String(Rc::from(
"Sure, here you go:\n```json\n{\"name\": \"Ada\", \"age\": 36}\n```\nDone.",
)),
schema,
];
let env = schema_recover_impl(args, None).await.unwrap();
let dict = env.as_dict().unwrap();
assert!(matches!(dict.get("ok"), Some(VmValue::Bool(true))));
assert_eq!(
dict.get("stage").map(VmValue::display).as_deref(),
Some("extracted"),
);
}
#[tokio::test]
async fn schema_recover_stage_regex_for_yaml_shape() {
let schema = person_schema();
let args = vec![
VmValue::String(Rc::from("name: Ada\nage: 36\nactive: true\n")),
schema,
];
let env = schema_recover_impl(args, None).await.unwrap();
let dict = env.as_dict().unwrap();
assert!(
matches!(dict.get("ok"), Some(VmValue::Bool(true))),
"envelope: {:?}",
env
);
assert_eq!(
dict.get("stage").map(VmValue::display).as_deref(),
Some("regex"),
);
let data = dict.get("data").unwrap().as_dict().unwrap();
assert_eq!(data.get("name").unwrap().display(), "Ada");
assert_eq!(data.get("age").unwrap().as_int(), Some(36));
}
#[tokio::test]
async fn schema_recover_failure_when_repair_disabled_and_unrecoverable() {
let schema = person_schema();
let mut opts = BTreeMap::new();
opts.insert("llm_repair".to_string(), VmValue::Bool(false));
let args = vec![
VmValue::String(Rc::from("nothing useful here at all")),
schema,
VmValue::Dict(Rc::new(opts)),
];
let env = schema_recover_impl(args, None).await.unwrap();
let dict = env.as_dict().unwrap();
assert!(matches!(dict.get("ok"), Some(VmValue::Bool(false))));
assert_eq!(
dict.get("stage").map(VmValue::display).as_deref(),
Some("failed"),
);
assert_eq!(
dict.get("error_category").map(VmValue::display).as_deref(),
Some("schema_validation"),
);
}
#[tokio::test]
async fn schema_recover_rejects_non_dict_schema() {
let args = vec![
VmValue::String(Rc::from("anything")),
VmValue::String(Rc::from("not a schema")),
];
let err = schema_recover_impl(args, None).await.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("schema must be a dict"), "got: {msg}");
}
#[tokio::test]
async fn schema_recover_rejects_missing_schema_arg() {
let args = vec![VmValue::String(Rc::from("anything"))];
let err = schema_recover_impl(args, None).await.unwrap_err();
let msg = err.to_string();
assert!(msg.contains("expected"), "got: {msg}");
}
}