Skip to main content

alint_rules/
structured_path.rs

1//! Structured-query rule family: `{json,yaml,toml}_path_{equals,matches}`.
2//!
3//! Six rule kinds share a single implementation that varies
4//! along two axes:
5//!
6//! - **Format** — `Json`, `Yaml`, or `Toml`. The file is parsed
7//!   into a `serde_json::Value` tree regardless (YAML and TOML
8//!   values coerce through serde), so the `JSONPath` engine only
9//!   has to reason about one tree shape.
10//! - **Op** — `Equals(value)` for exact equality or
11//!   `Matches(regex)` for regex on string values.
12//!
13//! All rule kinds require:
14//!
15//! - `paths` — which files to scan.
16//! - `path` — a `JSONPath` expression (RFC 9535) pointing at the
17//!   values to check.
18//! - Either `equals` (arbitrary YAML value) or `matches`
19//!   (regex string), according to the rule kind.
20//!
21//! ## Semantics
22//!
23//! `JSONPath` can return multiple matches (`$.deps[*].version`).
24//! Every match must satisfy the op; any single mismatch
25//! produces a violation at that match's location. If the query
26//! returns zero matches, that's one "path not found" violation
27//! — the option the user is enforcing doesn't exist.
28//!
29//! The optional **`if_present: true`** flag flips the zero-match
30//! case: under it, zero matches are silently OK, and only
31//! actual matches that fail the op produce violations. Useful
32//! for predicates that only apply when a field is present —
33//! e.g. "every `uses:` in a GitHub Actions workflow must be
34//! pinned to a commit SHA" (a workflow with only `run:` steps
35//! has no `uses:` at all and shouldn't be flagged).
36//!
37//! Unparseable files (bad JSON / YAML / TOML) produce one
38//! violation per file. An unparseable file is a documentation
39//! problem, not the structured rule's concern — but better to
40//! surface it than silently skip.
41
42use std::io::Read;
43
44use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
45use regex::Regex;
46use serde::Deserialize;
47use serde_json::Value;
48use serde_json_path::JsonPath;
49
50/// Which YAML-flavoured parser to use on the target file.
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum Format {
53    Json,
54    Yaml,
55    Toml,
56}
57
58impl Format {
59    pub(crate) fn parse(self, text: &str) -> std::result::Result<Value, String> {
60        match self {
61            Self::Json => serde_json::from_str(text).map_err(|e| e.to_string()),
62            Self::Yaml => serde_yaml_ng::from_str(text).map_err(|e| e.to_string()),
63            Self::Toml => toml::from_str(text).map_err(|e| e.to_string()),
64        }
65    }
66
67    pub(crate) fn label(self) -> &'static str {
68        match self {
69            Self::Json => "JSON",
70            Self::Yaml => "YAML",
71            Self::Toml => "TOML",
72        }
73    }
74
75    /// Detect the format from a path's extension. Returns `None`
76    /// for unknown extensions; callers decide how to fall back
77    /// (require an explicit `format:` override, default to JSON,
78    /// emit a per-file violation, etc).
79    pub(crate) fn detect_from_path(path: &std::path::Path) -> Option<Self> {
80        match path.extension()?.to_str()? {
81            "json" => Some(Self::Json),
82            "yaml" | "yml" => Some(Self::Yaml),
83            "toml" => Some(Self::Toml),
84            _ => None,
85        }
86    }
87}
88
89/// Comparison op — keeps the rule builders thin.
90#[derive(Debug)]
91pub enum Op {
92    /// Value at `path` must serialize-compare equal to this
93    /// literal. Any JSON-representable value works (bool,
94    /// number, string, array, object, null).
95    Equals(Value),
96    /// Value at `path` must be a string that the regex matches.
97    /// A non-string match produces a violation with a clear
98    /// `expected string, got <kind>` message.
99    Matches(Regex),
100}
101
102// ---------------------------------------------------------------
103// Options — deserialized from the rule spec's `extra` map.
104// ---------------------------------------------------------------
105
106/// Options shared by every `*_path_equals` rule kind.
107#[derive(Debug, Deserialize)]
108struct EqualsOptions {
109    path: String,
110    equals: Value,
111    #[serde(default)]
112    if_present: bool,
113}
114
115/// Options shared by every `*_path_matches` rule kind.
116#[derive(Debug, Deserialize)]
117struct MatchesOptions {
118    path: String,
119    matches: String,
120    #[serde(default)]
121    if_present: bool,
122}
123
124// ---------------------------------------------------------------
125// Rule
126// ---------------------------------------------------------------
127
128#[derive(Debug)]
129pub struct StructuredPathRule {
130    id: String,
131    level: Level,
132    policy_url: Option<String>,
133    message: Option<String>,
134    scope: Scope,
135    format: Format,
136    path_expr: JsonPath,
137    path_src: String,
138    op: Op,
139    /// When `true`, a `JSONPath` query that produces zero matches
140    /// is silently OK. When `false` (default), a zero-match query
141    /// is reported as a single violation — the "value being
142    /// enforced doesn't exist" case. Use `true` for predicates
143    /// that are conditional on the field being present (e.g.
144    /// "every `uses:` in a workflow must be SHA-pinned" — a
145    /// workflow with no `uses:` at all shouldn't be flagged).
146    if_present: bool,
147}
148
149impl Rule for StructuredPathRule {
150    fn id(&self) -> &str {
151        &self.id
152    }
153    fn level(&self) -> Level {
154        self.level
155    }
156    fn policy_url(&self) -> Option<&str> {
157        self.policy_url.as_deref()
158    }
159
160    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
161        let mut violations = Vec::new();
162        for entry in ctx.index.files() {
163            if !self.scope.matches(&entry.path) {
164                continue;
165            }
166            let full = ctx.root.join(&entry.path);
167            let Ok(text) = read_to_string(&full) else {
168                // permission / race — silent skip, like other
169                // content rules
170                continue;
171            };
172            let root_value = match self.format.parse(&text) {
173                Ok(v) => v,
174                Err(err) => {
175                    violations.push(
176                        Violation::new(format!(
177                            "not a valid {} document: {err}",
178                            self.format.label()
179                        ))
180                        .with_path(&entry.path),
181                    );
182                    continue;
183                }
184            };
185            let matches = self.path_expr.query(&root_value);
186            if matches.is_empty() {
187                if self.if_present {
188                    continue;
189                }
190                let msg = self
191                    .message
192                    .clone()
193                    .unwrap_or_else(|| format!("JSONPath `{}` produced no match", self.path_src));
194                violations.push(Violation::new(msg).with_path(&entry.path));
195                continue;
196            }
197            for m in matches.iter() {
198                if let Some(v) = check_match(m, &self.op) {
199                    let base = self.message.clone().unwrap_or(v);
200                    violations.push(Violation::new(base).with_path(&entry.path));
201                }
202            }
203        }
204        Ok(violations)
205    }
206}
207
208/// Return `Some(message)` if the match fails the op; `None` if it passes.
209fn check_match(m: &Value, op: &Op) -> Option<String> {
210    match op {
211        Op::Equals(expected) => {
212            if m == expected {
213                None
214            } else {
215                Some(format!(
216                    "value at path does not equal expected: expected {}, got {}",
217                    short_render(expected),
218                    short_render(m),
219                ))
220            }
221        }
222        Op::Matches(re) => {
223            let Some(s) = m.as_str() else {
224                return Some(format!(
225                    "value at path is not a string (got {}), can't apply regex",
226                    kind_name(m)
227                ));
228            };
229            if re.is_match(s) {
230                None
231            } else {
232                Some(format!(
233                    "value at path {} does not match regex {}",
234                    short_render(m),
235                    re.as_str(),
236                ))
237            }
238        }
239    }
240}
241
242/// A stable, short rendering for error messages. Avoids
243/// dumping a whole object when the mismatch is on a sub-key.
244fn short_render(v: &Value) -> String {
245    let raw = v.to_string();
246    if raw.len() <= 80 {
247        raw
248    } else {
249        format!("{}…", &raw[..80])
250    }
251}
252
253fn kind_name(v: &Value) -> &'static str {
254    match v {
255        Value::Null => "null",
256        Value::Bool(_) => "bool",
257        Value::Number(_) => "number",
258        Value::String(_) => "string",
259        Value::Array(_) => "array",
260        Value::Object(_) => "object",
261    }
262}
263
264fn read_to_string(path: &std::path::Path) -> std::io::Result<String> {
265    let mut f = std::fs::File::open(path)?;
266    let mut s = String::new();
267    f.read_to_string(&mut s)?;
268    Ok(s)
269}
270
271// ---------------------------------------------------------------
272// Builders
273//
274// Six thin wrappers per (Format, Op) combination. Each consumes
275// the spec, validates the structured-query options, and
276// constructs the shared `StructuredPathRule`.
277// ---------------------------------------------------------------
278
279pub fn json_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
280    build_equals(spec, Format::Json, "json_path_equals")
281}
282
283pub fn json_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
284    build_matches(spec, Format::Json, "json_path_matches")
285}
286
287pub fn yaml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
288    build_equals(spec, Format::Yaml, "yaml_path_equals")
289}
290
291pub fn yaml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
292    build_matches(spec, Format::Yaml, "yaml_path_matches")
293}
294
295pub fn toml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
296    build_equals(spec, Format::Toml, "toml_path_equals")
297}
298
299pub fn toml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
300    build_matches(spec, Format::Toml, "toml_path_matches")
301}
302
303fn build_equals(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
304    let paths = spec.paths.as_ref().ok_or_else(|| {
305        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
306    })?;
307    let opts: EqualsOptions = spec
308        .deserialize_options()
309        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
310    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
311        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
312    })?;
313    Ok(Box::new(StructuredPathRule {
314        id: spec.id.clone(),
315        level: spec.level,
316        policy_url: spec.policy_url.clone(),
317        message: spec.message.clone(),
318        scope: Scope::from_paths_spec(paths)?,
319        format,
320        path_expr,
321        path_src: opts.path,
322        op: Op::Equals(opts.equals),
323        if_present: opts.if_present,
324    }))
325}
326
327fn build_matches(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
328    let paths = spec.paths.as_ref().ok_or_else(|| {
329        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
330    })?;
331    let opts: MatchesOptions = spec
332        .deserialize_options()
333        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
334    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
335        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
336    })?;
337    let re = Regex::new(&opts.matches).map_err(|e| {
338        Error::rule_config(&spec.id, format!("invalid regex {:?}: {e}", opts.matches))
339    })?;
340    Ok(Box::new(StructuredPathRule {
341        id: spec.id.clone(),
342        level: spec.level,
343        policy_url: spec.policy_url.clone(),
344        message: spec.message.clone(),
345        scope: Scope::from_paths_spec(paths)?,
346        format,
347        path_expr,
348        path_src: opts.path,
349        op: Op::Matches(re),
350        if_present: opts.if_present,
351    }))
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
358
359    // ─── build-path errors ────────────────────────────────────
360
361    #[test]
362    fn build_rejects_missing_paths() {
363        let spec = spec_yaml(
364            "id: t\n\
365             kind: json_path_equals\n\
366             path: \"$.name\"\n\
367             equals: \"x\"\n\
368             level: error\n",
369        );
370        assert!(json_path_equals_build(&spec).is_err());
371    }
372
373    #[test]
374    fn build_rejects_invalid_jsonpath() {
375        let spec = spec_yaml(
376            "id: t\n\
377             kind: json_path_equals\n\
378             paths: \"package.json\"\n\
379             path: \"$..[invalid\"\n\
380             equals: \"x\"\n\
381             level: error\n",
382        );
383        assert!(json_path_equals_build(&spec).is_err());
384    }
385
386    #[test]
387    fn build_rejects_invalid_regex_in_matches() {
388        let spec = spec_yaml(
389            "id: t\n\
390             kind: json_path_matches\n\
391             paths: \"package.json\"\n\
392             path: \"$.version\"\n\
393             pattern: \"[unterminated\"\n\
394             level: error\n",
395        );
396        assert!(json_path_matches_build(&spec).is_err());
397    }
398
399    // ─── json_path_equals ─────────────────────────────────────
400
401    #[test]
402    fn json_path_equals_passes_when_value_matches() {
403        let spec = spec_yaml(
404            "id: t\n\
405             kind: json_path_equals\n\
406             paths: \"package.json\"\n\
407             path: \"$.name\"\n\
408             equals: \"demo\"\n\
409             level: error\n",
410        );
411        let rule = json_path_equals_build(&spec).unwrap();
412        let (tmp, idx) =
413            tempdir_with_files(&[("package.json", br#"{"name":"demo","version":"1.0.0"}"#)]);
414        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
415        assert!(v.is_empty(), "matching value should pass: {v:?}");
416    }
417
418    #[test]
419    fn json_path_equals_fires_on_mismatch() {
420        let spec = spec_yaml(
421            "id: t\n\
422             kind: json_path_equals\n\
423             paths: \"package.json\"\n\
424             path: \"$.name\"\n\
425             equals: \"demo\"\n\
426             level: error\n",
427        );
428        let rule = json_path_equals_build(&spec).unwrap();
429        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"name":"other"}"#)]);
430        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
431        assert_eq!(v.len(), 1);
432    }
433
434    #[test]
435    fn json_path_equals_fires_on_missing_path() {
436        let spec = spec_yaml(
437            "id: t\n\
438             kind: json_path_equals\n\
439             paths: \"package.json\"\n\
440             path: \"$.name\"\n\
441             equals: \"demo\"\n\
442             level: error\n",
443        );
444        let rule = json_path_equals_build(&spec).unwrap();
445        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.0"}"#)]);
446        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
447        assert_eq!(v.len(), 1, "missing path should fire");
448    }
449
450    #[test]
451    fn json_path_if_present_silent_on_missing() {
452        // `if_present: true` → missing path is silent.
453        let spec = spec_yaml(
454            "id: t\n\
455             kind: json_path_equals\n\
456             paths: \"package.json\"\n\
457             path: \"$.name\"\n\
458             equals: \"demo\"\n\
459             if_present: true\n\
460             level: error\n",
461        );
462        let rule = json_path_equals_build(&spec).unwrap();
463        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.0"}"#)]);
464        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
465        assert!(v.is_empty(), "if_present should silence: {v:?}");
466    }
467
468    // ─── json_path_matches ────────────────────────────────────
469
470    #[test]
471    fn json_path_matches_passes_on_pattern_hit() {
472        let spec = spec_yaml(
473            "id: t\n\
474             kind: json_path_matches\n\
475             paths: \"package.json\"\n\
476             path: \"$.version\"\n\
477             matches: \"^\\\\d+\\\\.\\\\d+\\\\.\\\\d+$\"\n\
478             level: error\n",
479        );
480        let rule = json_path_matches_build(&spec).unwrap();
481        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.2.3"}"#)]);
482        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
483        assert!(v.is_empty(), "matching version should pass: {v:?}");
484    }
485
486    #[test]
487    fn json_path_matches_fires_on_pattern_miss() {
488        let spec = spec_yaml(
489            "id: t\n\
490             kind: json_path_matches\n\
491             paths: \"package.json\"\n\
492             path: \"$.version\"\n\
493             matches: \"^\\\\d+\\\\.\\\\d+\\\\.\\\\d+$\"\n\
494             level: error\n",
495        );
496        let rule = json_path_matches_build(&spec).unwrap();
497        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"v1.x"}"#)]);
498        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
499        assert_eq!(v.len(), 1);
500    }
501
502    // ─── yaml_path_* ─────────────────────────────────────────
503
504    #[test]
505    fn yaml_path_equals_passes_when_value_matches() {
506        let spec = spec_yaml(
507            "id: t\n\
508             kind: yaml_path_equals\n\
509             paths: \".github/workflows/*.yml\"\n\
510             path: \"$.name\"\n\
511             equals: \"CI\"\n\
512             level: error\n",
513        );
514        let rule = yaml_path_equals_build(&spec).unwrap();
515        let (tmp, idx) = tempdir_with_files(&[(
516            ".github/workflows/ci.yml",
517            b"name: CI\non: push\njobs: {}\n",
518        )]);
519        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
520        assert!(v.is_empty(), "matching name should pass: {v:?}");
521    }
522
523    #[test]
524    fn yaml_path_matches_uses_bracket_notation_for_dashed_keys() {
525        // Per the memory note: dashed YAML keys need bracket
526        // notation (`$.foo['dashed-key']`) because the JSONPath
527        // dot-form can't parse them.
528        let spec = spec_yaml(
529            "id: t\n\
530             kind: yaml_path_matches\n\
531             paths: \"action.yml\"\n\
532             path: \"$.runs['using']\"\n\
533             matches: \"^node\\\\d+$\"\n\
534             level: error\n",
535        );
536        let rule = yaml_path_matches_build(&spec).unwrap();
537        let (tmp, idx) =
538            tempdir_with_files(&[("action.yml", b"runs:\n  using: node20\n  main: index.js\n")]);
539        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
540        assert!(v.is_empty(), "bracket notation should match: {v:?}");
541    }
542
543    // ─── toml_path_* ─────────────────────────────────────────
544
545    #[test]
546    fn toml_path_equals_passes_when_value_matches() {
547        let spec = spec_yaml(
548            "id: t\n\
549             kind: toml_path_equals\n\
550             paths: \"Cargo.toml\"\n\
551             path: \"$.package.edition\"\n\
552             equals: \"2024\"\n\
553             level: error\n",
554        );
555        let rule = toml_path_equals_build(&spec).unwrap();
556        let (tmp, idx) = tempdir_with_files(&[(
557            "Cargo.toml",
558            b"[package]\nname = \"x\"\nedition = \"2024\"\n",
559        )]);
560        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
561        assert!(v.is_empty(), "matching edition should pass: {v:?}");
562    }
563
564    #[test]
565    fn toml_path_matches_fires_on_floating_version() {
566        // Common policy: deps must be tilde-pinned, not bare.
567        let spec = spec_yaml(
568            "id: t\n\
569             kind: toml_path_matches\n\
570             paths: \"Cargo.toml\"\n\
571             path: \"$.dependencies.serde\"\n\
572             matches: \"^[~=]\"\n\
573             level: error\n",
574        );
575        let rule = toml_path_matches_build(&spec).unwrap();
576        let (tmp, idx) = tempdir_with_files(&[(
577            "Cargo.toml",
578            b"[package]\nname = \"x\"\n[dependencies]\nserde = \"1\"\n",
579        )]);
580        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
581        assert_eq!(v.len(), 1, "floating `serde = \"1\"` should fire");
582    }
583
584    // ─── parse error path ─────────────────────────────────────
585
586    #[test]
587    fn evaluate_fires_on_malformed_input() {
588        let spec = spec_yaml(
589            "id: t\n\
590             kind: json_path_equals\n\
591             paths: \"package.json\"\n\
592             path: \"$.name\"\n\
593             equals: \"x\"\n\
594             level: error\n",
595        );
596        let rule = json_path_equals_build(&spec).unwrap();
597        let (tmp, idx) = tempdir_with_files(&[("package.json", b"{not valid json")]);
598        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
599        assert_eq!(v.len(), 1, "malformed JSON should fire one violation");
600    }
601}