use std::path::{Path, PathBuf};
use serde_json::Value;
use crate::commands::gbnf_classifier as clf;
use crate::error::{CliError, Result};
pub(crate) fn run(observation_file: &Path, json: bool) -> Result<()> {
if !observation_file.exists() {
return Err(CliError::FileNotFound(PathBuf::from(observation_file)));
}
let body = std::fs::read_to_string(observation_file)?;
let obs: Value = serde_json::from_str(&body).map_err(|e| {
CliError::InvalidFormat(format!(
"apr gbnf-lint: failed to parse JSON from {}: {e}",
observation_file.display()
))
})?;
let json_out = classify_json(&obs);
let err_diag = classify_error_diagnostic(&obs);
let masking = classify_masking(&obs);
let fail_reasons: Vec<String> = [
json_out.as_ref().and_then(json_fail_reason),
err_diag.as_ref().and_then(error_fail_reason),
masking.as_ref().and_then(masking_fail_reason),
]
.into_iter()
.flatten()
.collect();
print_report(
observation_file,
json_out.as_ref(),
err_diag.as_ref(),
masking.as_ref(),
json,
);
if fail_reasons.is_empty() {
Ok(())
} else {
Err(CliError::ValidationFailed(fail_reasons.join("; ")))
}
}
fn classify_json(obs: &Value) -> Option<clf::JsonGrammarOutputOutcome> {
let output = obs.get("output")?.as_str()?;
let finish = obs.get("finish_reason")?.as_str()?;
Some(clf::classify_json_grammar_output(output, finish))
}
fn classify_error_diagnostic(obs: &Value) -> Option<clf::GrammarErrorDiagnosticOutcome> {
let ge = obs.get("grammar_error")?.as_object()?;
let exit_code = ge.get("exit_code")?.as_i64()? as i32;
let stderr = ge.get("stderr")?.as_str()?;
Some(clf::classify_grammar_error_diagnostic(exit_code, stderr))
}
fn classify_masking(obs: &Value) -> Option<clf::IllegalTokenMaskingOutcome> {
let m = obs.get("masking")?.as_object()?;
let logits_raw = m.get("logits")?.as_array()?;
let mask_raw = m.get("legal_mask")?.as_array()?;
let logits: Vec<f32> = logits_raw
.iter()
.map(|v| match v {
Value::Null => f32::NEG_INFINITY,
Value::Number(n) => n.as_f64().map(|x| x as f32).unwrap_or(f32::NAN),
_ => f32::NAN,
})
.collect();
let mask: Vec<bool> = mask_raw.iter().filter_map(|v| v.as_bool()).collect();
if mask.len() != mask_raw.len() {
return None;
}
Some(clf::classify_illegal_token_masking(&logits, &mask))
}
fn json_fail_reason(o: &clf::JsonGrammarOutputOutcome) -> Option<String> {
match o {
clf::JsonGrammarOutputOutcome::Ok => None,
clf::JsonGrammarOutputOutcome::EmptyOutput => {
Some("FALSIFY-CRUX-C-10-001 json: empty output string".to_string())
}
clf::JsonGrammarOutputOutcome::NotJson { error } => Some(format!(
"FALSIFY-CRUX-C-10-001 json: output does not parse as JSON: {error}"
)),
clf::JsonGrammarOutputOutcome::WrongFinishReason { got } => Some(format!(
"FALSIFY-CRUX-C-10-001 json: finish_reason={got:?} not in {{stop, length}}"
)),
}
}
fn error_fail_reason(o: &clf::GrammarErrorDiagnosticOutcome) -> Option<String> {
match o {
clf::GrammarErrorDiagnosticOutcome::Ok => None,
clf::GrammarErrorDiagnosticOutcome::ZeroExitCode => Some(
"FALSIFY-CRUX-C-10-002 diagnostic: malformed grammar silently accepted (exit 0)"
.to_string(),
),
clf::GrammarErrorDiagnosticOutcome::MissingGrammarDiagnostic { stderr_snippet } => {
Some(format!(
"FALSIFY-CRUX-C-10-002 diagnostic: stderr missing 'grammar' keyword; snippet={stderr_snippet:?}"
))
}
}
}
fn masking_fail_reason(o: &clf::IllegalTokenMaskingOutcome) -> Option<String> {
match o {
clf::IllegalTokenMaskingOutcome::Ok => None,
clf::IllegalTokenMaskingOutcome::LengthMismatch {
logits_len,
mask_len,
} => Some(format!(
"FALSIFY-CRUX-C-10-001 masking: length mismatch logits={logits_len} mask={mask_len}"
)),
clf::IllegalTokenMaskingOutcome::NoLegalTokens => Some(
"FALSIFY-CRUX-C-10-001 masking: legal_mask has no legal positions".to_string(),
),
clf::IllegalTokenMaskingOutcome::IllegalTokenNotMasked {
token_index,
logit,
} => Some(format!(
"FALSIFY-CRUX-C-10-001 masking: illegal token at idx {token_index} has logit={logit} (expected -Infinity)"
)),
}
}
fn print_report(
path: &Path,
json_out: Option<&clf::JsonGrammarOutputOutcome>,
err_diag: Option<&clf::GrammarErrorDiagnosticOutcome>,
masking: Option<&clf::IllegalTokenMaskingOutcome>,
json: bool,
) {
if json {
let v = serde_json::json!({
"observation_path": path.display().to_string(),
"json": json_out.map(|o| format!("{o:?}")),
"diagnostic": err_diag.map(|o| format!("{o:?}")),
"masking": masking.map(|o| format!("{o:?}")),
});
println!(
"{}",
serde_json::to_string_pretty(&v).unwrap_or_else(|_| v.to_string())
);
} else {
println!("gbnf-lint report for {}", path.display());
print_line(" json: ", json_out.map(|o| format!("{o:?}")));
print_line(" diagnostic: ", err_diag.map(|o| format!("{o:?}")));
print_line(" masking: ", masking.map(|o| format!("{o:?}")));
}
}
fn print_line(prefix: &str, v: Option<String>) {
match v {
Some(s) => println!("{prefix}{s}"),
None => println!("{prefix}(missing fields — classifier skipped)"),
}
}