use serde::{Deserialize, Serialize};
use serde_json::Value;
use serde_json_path::JsonPath;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ShapeConfidence {
High,
Medium,
Low,
}
impl ShapeConfidence {
fn rank(self) -> u8 {
match self {
ShapeConfidence::High => 2,
ShapeConfidence::Medium => 1,
ShapeConfidence::Low => 0,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CandidateFix {
pub path: String,
pub confidence: ShapeConfidence,
pub reason: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ShapeMismatchDiagnosis {
pub expected_path: String,
pub observed_keys: Vec<String>,
pub observed_type: String,
pub candidate_fixes: Vec<CandidateFix>,
pub high_confidence: bool,
}
const MAX_CANDIDATES: usize = 5;
pub fn diagnose(expected_path: &str, observed: &Value) -> ShapeMismatchDiagnosis {
let observed_type = value_type(observed).to_string();
let observed_keys = top_level_keys(observed);
let candidate_fixes = if is_simple_path(expected_path) {
let segments = parse_segments(expected_path);
build_candidates(&segments, observed)
} else {
Vec::new()
};
let high_confidence = candidate_fixes
.iter()
.any(|c| c.confidence == ShapeConfidence::High);
ShapeMismatchDiagnosis {
expected_path: expected_path.to_string(),
observed_keys,
observed_type,
candidate_fixes,
high_confidence,
}
}
fn value_type(value: &Value) -> &'static str {
match value {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
fn top_level_keys(value: &Value) -> Vec<String> {
match value {
Value::Object(map) => map.keys().cloned().collect(),
_ => Vec::new(),
}
}
fn is_simple_path(path: &str) -> bool {
if !path.starts_with('$') {
return false;
}
if path.contains("..") || path.contains('*') || path.contains('?') {
return false;
}
let mut chars = path.chars().peekable();
while let Some(c) = chars.next() {
if c == '[' {
let mut inner = String::new();
for d in chars.by_ref() {
if d == ']' {
break;
}
inner.push(d);
}
if inner.is_empty() || !inner.chars().all(|c| c.is_ascii_digit()) {
return false;
}
}
}
true
}
fn parse_segments(path: &str) -> Vec<String> {
let stripped = path.strip_prefix('$').unwrap_or(path);
let stripped = stripped.trim_start_matches('.');
if stripped.is_empty() {
return Vec::new();
}
stripped.split('.').map(|s| s.to_string()).collect()
}
fn segments_to_path(segments: &[&str]) -> String {
if segments.is_empty() {
"$".to_string()
} else {
format!("$.{}", segments.join("."))
}
}
fn path_exists(candidate: &str, observed: &Value) -> bool {
match JsonPath::parse(candidate) {
Ok(jp) => !jp.query(observed).all().is_empty(),
Err(_) => false,
}
}
fn build_candidates(segments: &[String], observed: &Value) -> Vec<CandidateFix> {
if segments.is_empty() {
return Vec::new();
}
let Some(obj) = observed.as_object() else {
return Vec::new();
};
let mut raw: Vec<(CandidateFix, u32)> = Vec::new();
let mut seen_paths = std::collections::HashSet::new();
let full_suffix: Vec<&str> = segments.iter().map(String::as_str).collect();
for (key, val) in obj {
if !val.is_object() {
continue;
}
let mut wrapped = vec![key.as_str()];
wrapped.extend(full_suffix.iter().copied());
let candidate = segments_to_path(&wrapped);
if !seen_paths.insert(candidate.clone()) {
continue;
}
if path_exists(&candidate, observed) {
raw.push((
CandidateFix {
path: candidate,
confidence: ShapeConfidence::High,
reason: format!(
"expected path is present under observed top-level key `{}`",
key
),
},
1,
));
}
}
if raw.is_empty() {
for (key1, val1) in obj {
let Some(inner) = val1.as_object() else {
continue;
};
for (key2, val2) in inner {
if !val2.is_object() {
continue;
}
let mut wrapped = vec![key1.as_str(), key2.as_str()];
wrapped.extend(full_suffix.iter().copied());
let candidate = segments_to_path(&wrapped);
if !seen_paths.insert(candidate.clone()) {
continue;
}
if path_exists(&candidate, observed) {
raw.push((
CandidateFix {
path: candidate,
confidence: ShapeConfidence::Medium,
reason: format!(
"expected path is present under observed keys `{}.{}`",
key1, key2
),
},
2,
));
}
}
}
}
for drop_count in 1..segments.len() {
let tail: Vec<&str> = segments[drop_count..].iter().map(String::as_str).collect();
if tail.is_empty() {
continue;
}
let candidate = segments_to_path(&tail);
if !seen_paths.insert(candidate.clone()) {
continue;
}
if path_exists(&candidate, observed) {
let dropped: Vec<&str> = segments[..drop_count].iter().map(String::as_str).collect();
raw.push((
CandidateFix {
path: candidate,
confidence: ShapeConfidence::Medium,
reason: format!(
"observed body matches path when prefix `{}` is dropped",
dropped.join(".")
),
},
(drop_count as u32) + 10,
));
}
}
raw.sort_by(|a, b| {
b.0.confidence
.rank()
.cmp(&a.0.confidence.rank())
.then(a.1.cmp(&b.1))
.then(a.0.path.cmp(&b.0.path))
});
raw.into_iter()
.map(|(c, _)| c)
.take(MAX_CANDIDATES)
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn leaf_under_top_level_object_is_high_confidence() {
let body = json!({"request": {"uuid": "abc"}, "stageStatus": "pending"});
let d = diagnose("$.uuid", &body);
assert_eq!(d.observed_type, "object");
assert!(d.high_confidence);
let top = &d.candidate_fixes[0];
assert_eq!(top.path, "$.request.uuid");
assert_eq!(top.confidence, ShapeConfidence::High);
assert!(d.observed_keys.contains(&"request".to_string()));
assert!(d.observed_keys.contains(&"stageStatus".to_string()));
}
#[test]
fn no_match_anywhere_returns_no_candidates() {
let body = json!({"foo": 1, "bar": "baz"});
let d = diagnose("$.uuid", &body);
assert!(d.candidate_fixes.is_empty());
assert!(!d.high_confidence);
}
#[test]
fn prefix_drop_when_parent_segment_is_missing() {
let body = json!({"items": [{"id": "x"}]});
let d = diagnose("$.data.items[0].id", &body);
assert!(!d.candidate_fixes.is_empty());
let top = &d.candidate_fixes[0];
assert_eq!(top.path, "$.items[0].id");
assert_eq!(top.confidence, ShapeConfidence::Medium);
}
#[test]
fn depth_two_wrap_is_medium_confidence() {
let body = json!({"envelope": {"data": {"uuid": "u"}}});
let d = diagnose("$.uuid", &body);
assert!(!d.candidate_fixes.is_empty());
let top = &d.candidate_fixes[0];
assert_eq!(top.path, "$.envelope.data.uuid");
assert_eq!(top.confidence, ShapeConfidence::Medium);
}
#[test]
fn array_observed_body_yields_no_candidates() {
let body = json!([{"uuid": "x"}]);
let d = diagnose("$.uuid", &body);
assert_eq!(d.observed_type, "array");
assert!(d.observed_keys.is_empty());
assert!(d.candidate_fixes.is_empty());
}
#[test]
fn scalar_observed_body_yields_no_candidates() {
let body = json!("hello");
let d = diagnose("$.uuid", &body);
assert_eq!(d.observed_type, "string");
assert!(d.candidate_fixes.is_empty());
}
#[test]
fn null_body_reports_null_type_and_no_candidates() {
let body = json!(null);
let d = diagnose("$.uuid", &body);
assert_eq!(d.observed_type, "null");
assert!(d.candidate_fixes.is_empty());
assert!(!d.high_confidence);
}
#[test]
fn filter_expression_paths_are_out_of_scope() {
let body = json!({"items": [{"id": 1, "ok": true}]});
let d = diagnose("$.items[?(@.ok)].id", &body);
assert_eq!(d.observed_type, "object");
assert!(d.observed_keys.contains(&"items".to_string()));
assert!(d.candidate_fixes.is_empty());
}
#[test]
fn candidates_are_capped_at_five() {
let mut pairs = serde_json::Map::new();
for i in 0..6 {
pairs.insert(format!("env{}", i), json!({ "uuid": "x" }));
}
let body = Value::Object(pairs);
let d = diagnose("$.uuid", &body);
assert_eq!(d.candidate_fixes.len(), MAX_CANDIDATES);
assert!(d
.candidate_fixes
.iter()
.all(|c| c.confidence == ShapeConfidence::High));
}
#[test]
fn candidate_reason_names_the_wrapping_key() {
let body = json!({"request": {"uuid": "x"}});
let d = diagnose("$.uuid", &body);
let top = &d.candidate_fixes[0];
assert!(
top.reason.contains("request"),
"reason should name the wrapping key, got: {}",
top.reason
);
}
#[test]
fn path_that_does_not_start_with_dollar_is_not_simple() {
assert!(!is_simple_path("foo.bar"));
assert!(!is_simple_path(".uuid"));
}
#[test]
fn bracket_filters_mark_path_as_non_simple() {
assert!(!is_simple_path("$.items[?(@.ok)]"));
assert!(!is_simple_path("$..uuid"));
assert!(!is_simple_path("$.items[*]"));
assert!(is_simple_path("$.items[0].id"));
assert!(is_simple_path("$.uuid"));
}
}