Skip to main content

alint_rules/
structured_path.rs

1//! Structured-query rule family: `{json,yaml,toml}_path_{equals,matches}`.
2//!
3//! Six rule kinds share a single implementation that varies
4//! along two axes:
5//!
6//! - **Format** — `Json`, `Yaml`, or `Toml`. The file is parsed
7//!   into a `serde_json::Value` tree regardless (YAML and TOML
8//!   values coerce through serde), so the `JSONPath` engine only
9//!   has to reason about one tree shape.
10//! - **Op** — `Equals(value)` for exact equality or
11//!   `Matches(regex)` for regex on string values.
12//!
13//! All rule kinds require:
14//!
15//! - `paths` — which files to scan.
16//! - `path` — a `JSONPath` expression (RFC 9535) pointing at the
17//!   values to check.
18//! - Either `equals` (arbitrary YAML value) or `matches`
19//!   (regex string), according to the rule kind.
20//!
21//! ## Semantics
22//!
23//! `JSONPath` can return multiple matches (`$.deps[*].version`).
24//! Every match must satisfy the op; any single mismatch
25//! produces a violation at that match's location. If the query
26//! returns zero matches, that's one "path not found" violation
27//! — the option the user is enforcing doesn't exist.
28//!
29//! The optional **`if_present: true`** flag flips the zero-match
30//! case: under it, zero matches are silently OK, and only
31//! actual matches that fail the op produce violations. Useful
32//! for predicates that only apply when a field is present —
33//! e.g. "every `uses:` in a GitHub Actions workflow must be
34//! pinned to a commit SHA" (a workflow with only `run:` steps
35//! has no `uses:` at all and shouldn't be flagged).
36//!
37//! Unparseable files (bad JSON / YAML / TOML) produce one
38//! violation per file. An unparseable file is a documentation
39//! problem, not the structured rule's concern — but better to
40//! surface it than silently skip.
41
42use std::path::Path;
43
44use alint_core::{Context, Error, Level, PerFileRule, Result, Rule, RuleSpec, Scope, Violation};
45use regex::Regex;
46use serde::Deserialize;
47use serde_json::Value;
48use serde_json_path::JsonPath;
49
50/// Which YAML-flavoured parser to use on the target file.
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum Format {
53    Json,
54    Yaml,
55    Toml,
56}
57
58impl Format {
59    pub(crate) fn parse(self, text: &str) -> std::result::Result<Value, String> {
60        match self {
61            Self::Json => serde_json::from_str(text).map_err(|e| e.to_string()),
62            Self::Yaml => serde_yaml_ng::from_str(text).map_err(|e| e.to_string()),
63            Self::Toml => toml::from_str(text).map_err(|e| e.to_string()),
64        }
65    }
66
67    pub(crate) fn label(self) -> &'static str {
68        match self {
69            Self::Json => "JSON",
70            Self::Yaml => "YAML",
71            Self::Toml => "TOML",
72        }
73    }
74
75    /// Detect the format from a path's extension. Returns `None`
76    /// for unknown extensions; callers decide how to fall back
77    /// (require an explicit `format:` override, default to JSON,
78    /// emit a per-file violation, etc).
79    pub(crate) fn detect_from_path(path: &std::path::Path) -> Option<Self> {
80        match path.extension()?.to_str()? {
81            "json" => Some(Self::Json),
82            "yaml" | "yml" => Some(Self::Yaml),
83            "toml" => Some(Self::Toml),
84            _ => None,
85        }
86    }
87}
88
89/// Comparison op — keeps the rule builders thin.
90#[derive(Debug)]
91pub enum Op {
92    /// Value at `path` must serialize-compare equal to this
93    /// literal. Any JSON-representable value works (bool,
94    /// number, string, array, object, null).
95    Equals(Value),
96    /// Value at `path` must be a string that the regex matches.
97    /// A non-string match produces a violation with a clear
98    /// `expected string, got <kind>` message.
99    Matches(Regex),
100}
101
102// ---------------------------------------------------------------
103// Options — deserialized from the rule spec's `extra` map.
104// ---------------------------------------------------------------
105
106/// Options shared by every `*_path_equals` rule kind.
107#[derive(Debug, Deserialize)]
108struct EqualsOptions {
109    path: String,
110    equals: Value,
111    #[serde(default)]
112    if_present: bool,
113}
114
115/// Options shared by every `*_path_matches` rule kind.
116#[derive(Debug, Deserialize)]
117struct MatchesOptions {
118    path: String,
119    matches: String,
120    #[serde(default)]
121    if_present: bool,
122}
123
124// ---------------------------------------------------------------
125// Rule
126// ---------------------------------------------------------------
127
128#[derive(Debug)]
129pub struct StructuredPathRule {
130    id: String,
131    level: Level,
132    policy_url: Option<String>,
133    message: Option<String>,
134    scope: Scope,
135    format: Format,
136    path_expr: JsonPath,
137    path_src: String,
138    op: Op,
139    /// When `true`, a `JSONPath` query that produces zero matches
140    /// is silently OK. When `false` (default), a zero-match query
141    /// is reported as a single violation — the "value being
142    /// enforced doesn't exist" case. Use `true` for predicates
143    /// that are conditional on the field being present (e.g.
144    /// "every `uses:` in a workflow must be SHA-pinned" — a
145    /// workflow with no `uses:` at all shouldn't be flagged).
146    if_present: bool,
147}
148
149impl Rule for StructuredPathRule {
150    fn id(&self) -> &str {
151        &self.id
152    }
153    fn level(&self) -> Level {
154        self.level
155    }
156    fn policy_url(&self) -> Option<&str> {
157        self.policy_url.as_deref()
158    }
159
160    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
161        let mut violations = Vec::new();
162        for entry in ctx.index.files() {
163            if !self.scope.matches(&entry.path) {
164                continue;
165            }
166            let full = ctx.root.join(&entry.path);
167            let Ok(bytes) = std::fs::read(&full) else {
168                // permission / race — silent skip, like other
169                // content rules
170                continue;
171            };
172            violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
173        }
174        Ok(violations)
175    }
176
177    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
178        Some(self)
179    }
180}
181
182impl PerFileRule for StructuredPathRule {
183    fn path_scope(&self) -> &Scope {
184        &self.scope
185    }
186
187    fn evaluate_file(
188        &self,
189        _ctx: &Context<'_>,
190        path: &Path,
191        bytes: &[u8],
192    ) -> Result<Vec<Violation>> {
193        let Ok(text) = std::str::from_utf8(bytes) else {
194            return Ok(Vec::new());
195        };
196        let root_value = match self.format.parse(text) {
197            Ok(v) => v,
198            Err(err) => {
199                return Ok(vec![
200                    Violation::new(format!(
201                        "not a valid {} document: {err}",
202                        self.format.label()
203                    ))
204                    .with_path(std::sync::Arc::<Path>::from(path)),
205                ]);
206            }
207        };
208        let matches = self.path_expr.query(&root_value);
209        if matches.is_empty() {
210            if self.if_present {
211                return Ok(Vec::new());
212            }
213            let msg = self
214                .message
215                .clone()
216                .unwrap_or_else(|| format!("JSONPath `{}` produced no match", self.path_src));
217            return Ok(vec![
218                Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
219            ]);
220        }
221        let mut violations = Vec::new();
222        for m in matches.iter() {
223            if let Some(v) = check_match(m, &self.op) {
224                let base = self.message.clone().unwrap_or(v);
225                violations.push(Violation::new(base).with_path(std::sync::Arc::<Path>::from(path)));
226            }
227        }
228        Ok(violations)
229    }
230}
231
232/// Return `Some(message)` if the match fails the op; `None` if it passes.
233fn check_match(m: &Value, op: &Op) -> Option<String> {
234    match op {
235        Op::Equals(expected) => {
236            if m == expected {
237                None
238            } else {
239                Some(format!(
240                    "value at path does not equal expected: expected {}, got {}",
241                    short_render(expected),
242                    short_render(m),
243                ))
244            }
245        }
246        Op::Matches(re) => {
247            let Some(s) = m.as_str() else {
248                return Some(format!(
249                    "value at path is not a string (got {}), can't apply regex",
250                    kind_name(m)
251                ));
252            };
253            if re.is_match(s) {
254                None
255            } else {
256                Some(format!(
257                    "value at path {} does not match regex {}",
258                    short_render(m),
259                    re.as_str(),
260                ))
261            }
262        }
263    }
264}
265
266/// A stable, short rendering for error messages. Avoids
267/// dumping a whole object when the mismatch is on a sub-key.
268fn short_render(v: &Value) -> String {
269    let raw = v.to_string();
270    if raw.len() <= 80 {
271        raw
272    } else {
273        format!("{}…", &raw[..80])
274    }
275}
276
277fn kind_name(v: &Value) -> &'static str {
278    match v {
279        Value::Null => "null",
280        Value::Bool(_) => "bool",
281        Value::Number(_) => "number",
282        Value::String(_) => "string",
283        Value::Array(_) => "array",
284        Value::Object(_) => "object",
285    }
286}
287
288
289// ---------------------------------------------------------------
290// Builders
291//
292// Six thin wrappers per (Format, Op) combination. Each consumes
293// the spec, validates the structured-query options, and
294// constructs the shared `StructuredPathRule`.
295// ---------------------------------------------------------------
296
297pub fn json_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
298    build_equals(spec, Format::Json, "json_path_equals")
299}
300
301pub fn json_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
302    build_matches(spec, Format::Json, "json_path_matches")
303}
304
305pub fn yaml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
306    build_equals(spec, Format::Yaml, "yaml_path_equals")
307}
308
309pub fn yaml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
310    build_matches(spec, Format::Yaml, "yaml_path_matches")
311}
312
313pub fn toml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
314    build_equals(spec, Format::Toml, "toml_path_equals")
315}
316
317pub fn toml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
318    build_matches(spec, Format::Toml, "toml_path_matches")
319}
320
321fn build_equals(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
322    let paths = spec.paths.as_ref().ok_or_else(|| {
323        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
324    })?;
325    let opts: EqualsOptions = spec
326        .deserialize_options()
327        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
328    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
329        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
330    })?;
331    Ok(Box::new(StructuredPathRule {
332        id: spec.id.clone(),
333        level: spec.level,
334        policy_url: spec.policy_url.clone(),
335        message: spec.message.clone(),
336        scope: Scope::from_paths_spec(paths)?,
337        format,
338        path_expr,
339        path_src: opts.path,
340        op: Op::Equals(opts.equals),
341        if_present: opts.if_present,
342    }))
343}
344
345fn build_matches(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
346    let paths = spec.paths.as_ref().ok_or_else(|| {
347        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
348    })?;
349    let opts: MatchesOptions = spec
350        .deserialize_options()
351        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
352    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
353        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
354    })?;
355    let re = Regex::new(&opts.matches).map_err(|e| {
356        Error::rule_config(&spec.id, format!("invalid regex {:?}: {e}", opts.matches))
357    })?;
358    Ok(Box::new(StructuredPathRule {
359        id: spec.id.clone(),
360        level: spec.level,
361        policy_url: spec.policy_url.clone(),
362        message: spec.message.clone(),
363        scope: Scope::from_paths_spec(paths)?,
364        format,
365        path_expr,
366        path_src: opts.path,
367        op: Op::Matches(re),
368        if_present: opts.if_present,
369    }))
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
376
377    // ─── build-path errors ────────────────────────────────────
378
379    #[test]
380    fn build_rejects_missing_paths() {
381        let spec = spec_yaml(
382            "id: t\n\
383             kind: json_path_equals\n\
384             path: \"$.name\"\n\
385             equals: \"x\"\n\
386             level: error\n",
387        );
388        assert!(json_path_equals_build(&spec).is_err());
389    }
390
391    #[test]
392    fn build_rejects_invalid_jsonpath() {
393        let spec = spec_yaml(
394            "id: t\n\
395             kind: json_path_equals\n\
396             paths: \"package.json\"\n\
397             path: \"$..[invalid\"\n\
398             equals: \"x\"\n\
399             level: error\n",
400        );
401        assert!(json_path_equals_build(&spec).is_err());
402    }
403
404    #[test]
405    fn build_rejects_invalid_regex_in_matches() {
406        let spec = spec_yaml(
407            "id: t\n\
408             kind: json_path_matches\n\
409             paths: \"package.json\"\n\
410             path: \"$.version\"\n\
411             pattern: \"[unterminated\"\n\
412             level: error\n",
413        );
414        assert!(json_path_matches_build(&spec).is_err());
415    }
416
417    // ─── json_path_equals ─────────────────────────────────────
418
419    #[test]
420    fn json_path_equals_passes_when_value_matches() {
421        let spec = spec_yaml(
422            "id: t\n\
423             kind: json_path_equals\n\
424             paths: \"package.json\"\n\
425             path: \"$.name\"\n\
426             equals: \"demo\"\n\
427             level: error\n",
428        );
429        let rule = json_path_equals_build(&spec).unwrap();
430        let (tmp, idx) =
431            tempdir_with_files(&[("package.json", br#"{"name":"demo","version":"1.0.0"}"#)]);
432        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
433        assert!(v.is_empty(), "matching value should pass: {v:?}");
434    }
435
436    #[test]
437    fn json_path_equals_fires_on_mismatch() {
438        let spec = spec_yaml(
439            "id: t\n\
440             kind: json_path_equals\n\
441             paths: \"package.json\"\n\
442             path: \"$.name\"\n\
443             equals: \"demo\"\n\
444             level: error\n",
445        );
446        let rule = json_path_equals_build(&spec).unwrap();
447        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"name":"other"}"#)]);
448        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
449        assert_eq!(v.len(), 1);
450    }
451
452    #[test]
453    fn json_path_equals_fires_on_missing_path() {
454        let spec = spec_yaml(
455            "id: t\n\
456             kind: json_path_equals\n\
457             paths: \"package.json\"\n\
458             path: \"$.name\"\n\
459             equals: \"demo\"\n\
460             level: error\n",
461        );
462        let rule = json_path_equals_build(&spec).unwrap();
463        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.0"}"#)]);
464        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
465        assert_eq!(v.len(), 1, "missing path should fire");
466    }
467
468    #[test]
469    fn json_path_if_present_silent_on_missing() {
470        // `if_present: true` → missing path is silent.
471        let spec = spec_yaml(
472            "id: t\n\
473             kind: json_path_equals\n\
474             paths: \"package.json\"\n\
475             path: \"$.name\"\n\
476             equals: \"demo\"\n\
477             if_present: true\n\
478             level: error\n",
479        );
480        let rule = json_path_equals_build(&spec).unwrap();
481        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.0"}"#)]);
482        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
483        assert!(v.is_empty(), "if_present should silence: {v:?}");
484    }
485
486    // ─── json_path_matches ────────────────────────────────────
487
488    #[test]
489    fn json_path_matches_passes_on_pattern_hit() {
490        let spec = spec_yaml(
491            "id: t\n\
492             kind: json_path_matches\n\
493             paths: \"package.json\"\n\
494             path: \"$.version\"\n\
495             matches: \"^\\\\d+\\\\.\\\\d+\\\\.\\\\d+$\"\n\
496             level: error\n",
497        );
498        let rule = json_path_matches_build(&spec).unwrap();
499        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.2.3"}"#)]);
500        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
501        assert!(v.is_empty(), "matching version should pass: {v:?}");
502    }
503
504    #[test]
505    fn json_path_matches_fires_on_pattern_miss() {
506        let spec = spec_yaml(
507            "id: t\n\
508             kind: json_path_matches\n\
509             paths: \"package.json\"\n\
510             path: \"$.version\"\n\
511             matches: \"^\\\\d+\\\\.\\\\d+\\\\.\\\\d+$\"\n\
512             level: error\n",
513        );
514        let rule = json_path_matches_build(&spec).unwrap();
515        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"v1.x"}"#)]);
516        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
517        assert_eq!(v.len(), 1);
518    }
519
520    // ─── yaml_path_* ─────────────────────────────────────────
521
522    #[test]
523    fn yaml_path_equals_passes_when_value_matches() {
524        let spec = spec_yaml(
525            "id: t\n\
526             kind: yaml_path_equals\n\
527             paths: \".github/workflows/*.yml\"\n\
528             path: \"$.name\"\n\
529             equals: \"CI\"\n\
530             level: error\n",
531        );
532        let rule = yaml_path_equals_build(&spec).unwrap();
533        let (tmp, idx) = tempdir_with_files(&[(
534            ".github/workflows/ci.yml",
535            b"name: CI\non: push\njobs: {}\n",
536        )]);
537        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
538        assert!(v.is_empty(), "matching name should pass: {v:?}");
539    }
540
541    #[test]
542    fn yaml_path_matches_uses_bracket_notation_for_dashed_keys() {
543        // Per the memory note: dashed YAML keys need bracket
544        // notation (`$.foo['dashed-key']`) because the JSONPath
545        // dot-form can't parse them.
546        let spec = spec_yaml(
547            "id: t\n\
548             kind: yaml_path_matches\n\
549             paths: \"action.yml\"\n\
550             path: \"$.runs['using']\"\n\
551             matches: \"^node\\\\d+$\"\n\
552             level: error\n",
553        );
554        let rule = yaml_path_matches_build(&spec).unwrap();
555        let (tmp, idx) =
556            tempdir_with_files(&[("action.yml", b"runs:\n  using: node20\n  main: index.js\n")]);
557        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
558        assert!(v.is_empty(), "bracket notation should match: {v:?}");
559    }
560
561    // ─── toml_path_* ─────────────────────────────────────────
562
563    #[test]
564    fn toml_path_equals_passes_when_value_matches() {
565        let spec = spec_yaml(
566            "id: t\n\
567             kind: toml_path_equals\n\
568             paths: \"Cargo.toml\"\n\
569             path: \"$.package.edition\"\n\
570             equals: \"2024\"\n\
571             level: error\n",
572        );
573        let rule = toml_path_equals_build(&spec).unwrap();
574        let (tmp, idx) = tempdir_with_files(&[(
575            "Cargo.toml",
576            b"[package]\nname = \"x\"\nedition = \"2024\"\n",
577        )]);
578        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
579        assert!(v.is_empty(), "matching edition should pass: {v:?}");
580    }
581
582    #[test]
583    fn toml_path_matches_fires_on_floating_version() {
584        // Common policy: deps must be tilde-pinned, not bare.
585        let spec = spec_yaml(
586            "id: t\n\
587             kind: toml_path_matches\n\
588             paths: \"Cargo.toml\"\n\
589             path: \"$.dependencies.serde\"\n\
590             matches: \"^[~=]\"\n\
591             level: error\n",
592        );
593        let rule = toml_path_matches_build(&spec).unwrap();
594        let (tmp, idx) = tempdir_with_files(&[(
595            "Cargo.toml",
596            b"[package]\nname = \"x\"\n[dependencies]\nserde = \"1\"\n",
597        )]);
598        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
599        assert_eq!(v.len(), 1, "floating `serde = \"1\"` should fire");
600    }
601
602    // ─── parse error path ─────────────────────────────────────
603
604    #[test]
605    fn evaluate_fires_on_malformed_input() {
606        let spec = spec_yaml(
607            "id: t\n\
608             kind: json_path_equals\n\
609             paths: \"package.json\"\n\
610             path: \"$.name\"\n\
611             equals: \"x\"\n\
612             level: error\n",
613        );
614        let rule = json_path_equals_build(&spec).unwrap();
615        let (tmp, idx) = tempdir_with_files(&[("package.json", b"{not valid json")]);
616        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
617        assert_eq!(v.len(), 1, "malformed JSON should fire one violation");
618    }
619}