Skip to main content

alint_core/
jsonpath_diagnostics.rs

1//! Domain-specific hints for `JSONPath` parse errors.
2//!
3//! `serde_json_path` is RFC 9535-compliant. The authoring mistake
4//! the P2a launch-prep validation pass surfaced and confirmed against
5//! `serde_json_path` 0.7.x is:
6//!
7//! - **§ 10** — dashed keys after a `.` segment (e.g.
8//!   `$.package-name` or `$.foo[?@.dashed-key == 'x']`) are rejected;
9//!   bracket notation (`$['package-name']`, `$.foo[?@['dashed-key']`)
10//!   is required by the spec. This applies in both top-level path
11//!   segments and inside filter expressions.
12//!
13//! (A previously-suspected pitfall — outer parentheses on filter
14//! predicates `$.foo[?(@.bar == 'baz')]` — was investigated during
15//! Phase 4 and confirmed valid against `serde_json_path` 0.7.2; the
16//! original report mis-attributed a dashed-key error to the parens.
17//! See the v0.9.15 Phase 4 commit for the test that proved it.)
18//!
19//! The raw `serde_json_path` error message says *what* failed
20//! (lexical / syntax) but not *why*. This helper inspects the source
21//! path for the dashed-key pattern and appends a hint pointing at the
22//! canonical-correct form.
23
24use std::sync::OnceLock;
25
26use regex::Regex;
27
28fn dashed_after_dot_re() -> &'static Regex {
29    static RE: OnceLock<Regex> = OnceLock::new();
30    RE.get_or_init(|| {
31        // `.` followed by an identifier-start that contains a dash
32        // before the next segment boundary. Catches `$.foo-bar`,
33        // `$.a.foo-bar.b`, `$['x'].foo-bar`, etc.
34        Regex::new(r"\.([A-Za-z_][A-Za-z0-9_]*-[A-Za-z0-9_-]+)").expect("static regex")
35    })
36}
37
38/// Inspect a `JSONPath` source string for known-bad patterns and
39/// return a hint string when one applies.
40///
41/// Returns `None` for paths that pass the inspection — callers should
42/// surface the raw `serde_json_path` error unchanged in that case.
43pub fn diagnose_path(path: &str) -> Option<String> {
44    // Pitfall #10: dashed key after `.` segment (in any position —
45    // top-level or inside a filter). RFC 9535 dot-notation requires
46    // identifier-shape keys; dashed keys must use bracket notation.
47    if let Some(cap) = dashed_after_dot_re().captures(path)
48        && let Some(key_match) = cap.get(1)
49    {
50        let key = key_match.as_str();
51        return Some(format!(
52            "JSONPath dot-notation requires identifier-shape keys (RFC 9535). For dashed keys, use \
53             bracket notation: `$['{key}']` instead of `$.{key}` (or `@['{key}']` instead of \
54             `@.{key}` inside a filter). See `docs/development/CONFIG-AUTHORING.md` § 10.",
55        ));
56    }
57
58    None
59}
60
61/// Build a complete error-message string for a `JSONPath` parse
62/// failure: `invalid JSONPath "<path>": <serde_json_path err>` plus
63/// any domain-specific hint from [`diagnose_path`].
64pub fn format_parse_error(path: &str, err: impl std::fmt::Display) -> String {
65    let base = format!("invalid JSONPath {path:?}: {err}");
66    match diagnose_path(path) {
67        Some(hint) => format!("{base}\n  hint: {hint}"),
68        None => base,
69    }
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75
76    #[test]
77    fn dashed_key_inside_filter_gets_hint() {
78        // The arrow case study originally attributed the failure to
79        // outer parens; in fact `serde_json_path` 0.7.x accepts
80        // outer-parens filters and the real issue was the dashed
81        // `package-ecosystem` key inside. The hint should still fire
82        // and point at bracket notation.
83        let path = "$.updates[?(@.package-ecosystem == 'github-actions')]";
84        let hint = diagnose_path(path).expect("should diagnose");
85        assert!(hint.contains("$['package-ecosystem']"), "hint: {hint}");
86        assert!(
87            hint.contains("@['package-ecosystem']"),
88            "should mention filter form: {hint}",
89        );
90    }
91
92    #[test]
93    fn dashed_key_after_dot_gets_hint() {
94        let path = "$.package-name";
95        let hint = diagnose_path(path).expect("should diagnose");
96        assert!(hint.contains("$['package-name']"), "hint: {hint}");
97        assert!(hint.contains("§ 10"), "hint: {hint}");
98    }
99
100    #[test]
101    fn dashed_key_in_middle_path_gets_hint() {
102        let path = "$.foo.dashed-key.bar";
103        let hint = diagnose_path(path).expect("should diagnose");
104        assert!(hint.contains("$['dashed-key']"), "hint: {hint}");
105    }
106
107    #[test]
108    fn already_correct_bracket_notation_no_hint() {
109        let path = "$['package-name']";
110        assert!(diagnose_path(path).is_none());
111    }
112
113    #[test]
114    fn correct_filter_no_hint() {
115        let path = "$.updates[?@.bar == 'baz']";
116        assert!(diagnose_path(path).is_none());
117    }
118
119    #[test]
120    fn outer_parens_alone_no_hint() {
121        // `serde_json_path` 0.7.x accepts outer-parens filter
122        // predicates — there's nothing to diagnose.
123        let path = "$.updates[?(@.bar == 'baz')]";
124        assert!(diagnose_path(path).is_none());
125    }
126
127    #[test]
128    fn plain_dot_path_no_hint() {
129        let path = "$.package.edition";
130        assert!(diagnose_path(path).is_none());
131    }
132
133    #[test]
134    fn format_parse_error_includes_hint_when_diagnosed() {
135        let out = format_parse_error("$.foo-bar", "syntax error at column 7");
136        assert!(out.contains("invalid JSONPath"), "out: {out}");
137        assert!(out.contains("syntax error"), "out: {out}");
138        assert!(out.contains("hint:"), "out: {out}");
139        assert!(out.contains("$['foo-bar']"), "out: {out}");
140    }
141
142    #[test]
143    fn format_parse_error_no_hint_when_undiagnosed() {
144        let out = format_parse_error("$.foo[", "unterminated bracket");
145        assert!(out.contains("invalid JSONPath"), "out: {out}");
146        assert!(out.contains("unterminated bracket"), "out: {out}");
147        assert!(!out.contains("hint:"), "out: {out}");
148    }
149}