Skip to main content

alint_rules/
structured_path.rs

1//! Structured-query rule family: `{json,yaml,toml}_path_{equals,matches}`.
2//!
3//! Six rule kinds share a single implementation that varies
4//! along two axes:
5//!
6//! - **Format** — `Json`, `Yaml`, or `Toml`. The file is parsed
7//!   into a `serde_json::Value` tree regardless (YAML and TOML
8//!   values coerce through serde), so the `JSONPath` engine only
9//!   has to reason about one tree shape.
10//! - **Op** — `Equals(value)` for exact equality or
11//!   `Matches(regex)` for regex on string values.
12//!
13//! All rule kinds require:
14//!
15//! - `paths` — which files to scan.
16//! - `path` — a `JSONPath` expression (RFC 9535) pointing at the
17//!   values to check.
18//! - Either `equals` (arbitrary YAML value) or `matches`
19//!   (regex string), according to the rule kind.
20//!
21//! ## Semantics
22//!
23//! `JSONPath` can return multiple matches (`$.deps[*].version`).
24//! Every match must satisfy the op; any single mismatch
25//! produces a violation at that match's location. If the query
26//! returns zero matches, that's one "path not found" violation
27//! — the option the user is enforcing doesn't exist.
28//!
29//! The optional **`if_present: true`** flag flips the zero-match
30//! case: under it, zero matches are silently OK, and only
31//! actual matches that fail the op produce violations. Useful
32//! for predicates that only apply when a field is present —
33//! e.g. "every `uses:` in a GitHub Actions workflow must be
34//! pinned to a commit SHA" (a workflow with only `run:` steps
35//! has no `uses:` at all and shouldn't be flagged).
36//!
37//! Unparseable files (bad JSON / YAML / TOML) produce one
38//! violation per file. An unparseable file is a documentation
39//! problem, not the structured rule's concern — but better to
40//! surface it than silently skip.
41
42use std::io::Read;
43
44use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
45use regex::Regex;
46use serde::Deserialize;
47use serde_json::Value;
48use serde_json_path::JsonPath;
49
50/// Which YAML-flavoured parser to use on the target file.
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum Format {
53    Json,
54    Yaml,
55    Toml,
56}
57
58impl Format {
59    fn parse(self, text: &str) -> std::result::Result<Value, String> {
60        match self {
61            Self::Json => serde_json::from_str(text).map_err(|e| e.to_string()),
62            Self::Yaml => serde_yaml_ng::from_str(text).map_err(|e| e.to_string()),
63            Self::Toml => toml::from_str(text).map_err(|e| e.to_string()),
64        }
65    }
66
67    fn label(self) -> &'static str {
68        match self {
69            Self::Json => "JSON",
70            Self::Yaml => "YAML",
71            Self::Toml => "TOML",
72        }
73    }
74}
75
76/// Comparison op — keeps the rule builders thin.
77#[derive(Debug)]
78pub enum Op {
79    /// Value at `path` must serialize-compare equal to this
80    /// literal. Any JSON-representable value works (bool,
81    /// number, string, array, object, null).
82    Equals(Value),
83    /// Value at `path` must be a string that the regex matches.
84    /// A non-string match produces a violation with a clear
85    /// "expected string, got <kind>" message.
86    Matches(Regex),
87}
88
89// ---------------------------------------------------------------
90// Options — deserialized from the rule spec's `extra` map.
91// ---------------------------------------------------------------
92
93/// Options shared by every `*_path_equals` rule kind.
94#[derive(Debug, Deserialize)]
95struct EqualsOptions {
96    path: String,
97    equals: Value,
98    #[serde(default)]
99    if_present: bool,
100}
101
102/// Options shared by every `*_path_matches` rule kind.
103#[derive(Debug, Deserialize)]
104struct MatchesOptions {
105    path: String,
106    matches: String,
107    #[serde(default)]
108    if_present: bool,
109}
110
111// ---------------------------------------------------------------
112// Rule
113// ---------------------------------------------------------------
114
115#[derive(Debug)]
116pub struct StructuredPathRule {
117    id: String,
118    level: Level,
119    policy_url: Option<String>,
120    message: Option<String>,
121    scope: Scope,
122    format: Format,
123    path_expr: JsonPath,
124    path_src: String,
125    op: Op,
126    /// When `true`, a `JSONPath` query that produces zero matches
127    /// is silently OK. When `false` (default), a zero-match query
128    /// is reported as a single violation — the "value being
129    /// enforced doesn't exist" case. Use `true` for predicates
130    /// that are conditional on the field being present (e.g.
131    /// "every `uses:` in a workflow must be SHA-pinned" — a
132    /// workflow with no `uses:` at all shouldn't be flagged).
133    if_present: bool,
134}
135
136impl Rule for StructuredPathRule {
137    fn id(&self) -> &str {
138        &self.id
139    }
140    fn level(&self) -> Level {
141        self.level
142    }
143    fn policy_url(&self) -> Option<&str> {
144        self.policy_url.as_deref()
145    }
146
147    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
148        let mut violations = Vec::new();
149        for entry in ctx.index.files() {
150            if !self.scope.matches(&entry.path) {
151                continue;
152            }
153            let full = ctx.root.join(&entry.path);
154            let Ok(text) = read_to_string(&full) else {
155                // permission / race — silent skip, like other
156                // content rules
157                continue;
158            };
159            let root_value = match self.format.parse(&text) {
160                Ok(v) => v,
161                Err(err) => {
162                    violations.push(
163                        Violation::new(format!(
164                            "not a valid {} document: {err}",
165                            self.format.label()
166                        ))
167                        .with_path(&entry.path),
168                    );
169                    continue;
170                }
171            };
172            let matches = self.path_expr.query(&root_value);
173            if matches.is_empty() {
174                if self.if_present {
175                    continue;
176                }
177                let msg = self
178                    .message
179                    .clone()
180                    .unwrap_or_else(|| format!("JSONPath `{}` produced no match", self.path_src));
181                violations.push(Violation::new(msg).with_path(&entry.path));
182                continue;
183            }
184            for m in matches.iter() {
185                if let Some(v) = check_match(m, &self.op) {
186                    let base = self.message.clone().unwrap_or(v);
187                    violations.push(Violation::new(base).with_path(&entry.path));
188                }
189            }
190        }
191        Ok(violations)
192    }
193}
194
195/// Return `Some(message)` if the match fails the op; `None` if it passes.
196fn check_match(m: &Value, op: &Op) -> Option<String> {
197    match op {
198        Op::Equals(expected) => {
199            if m == expected {
200                None
201            } else {
202                Some(format!(
203                    "value at path does not equal expected: expected {}, got {}",
204                    short_render(expected),
205                    short_render(m),
206                ))
207            }
208        }
209        Op::Matches(re) => {
210            let Some(s) = m.as_str() else {
211                return Some(format!(
212                    "value at path is not a string (got {}), can't apply regex",
213                    kind_name(m)
214                ));
215            };
216            if re.is_match(s) {
217                None
218            } else {
219                Some(format!(
220                    "value at path {} does not match regex {}",
221                    short_render(m),
222                    re.as_str(),
223                ))
224            }
225        }
226    }
227}
228
229/// A stable, short rendering for error messages. Avoids
230/// dumping a whole object when the mismatch is on a sub-key.
231fn short_render(v: &Value) -> String {
232    let raw = v.to_string();
233    if raw.len() <= 80 {
234        raw
235    } else {
236        format!("{}…", &raw[..80])
237    }
238}
239
240fn kind_name(v: &Value) -> &'static str {
241    match v {
242        Value::Null => "null",
243        Value::Bool(_) => "bool",
244        Value::Number(_) => "number",
245        Value::String(_) => "string",
246        Value::Array(_) => "array",
247        Value::Object(_) => "object",
248    }
249}
250
251fn read_to_string(path: &std::path::Path) -> std::io::Result<String> {
252    let mut f = std::fs::File::open(path)?;
253    let mut s = String::new();
254    f.read_to_string(&mut s)?;
255    Ok(s)
256}
257
258// ---------------------------------------------------------------
259// Builders
260//
261// Six thin wrappers per (Format, Op) combination. Each consumes
262// the spec, validates the structured-query options, and
263// constructs the shared `StructuredPathRule`.
264// ---------------------------------------------------------------
265
266pub fn json_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
267    build_equals(spec, Format::Json, "json_path_equals")
268}
269
270pub fn json_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
271    build_matches(spec, Format::Json, "json_path_matches")
272}
273
274pub fn yaml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
275    build_equals(spec, Format::Yaml, "yaml_path_equals")
276}
277
278pub fn yaml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
279    build_matches(spec, Format::Yaml, "yaml_path_matches")
280}
281
282pub fn toml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
283    build_equals(spec, Format::Toml, "toml_path_equals")
284}
285
286pub fn toml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
287    build_matches(spec, Format::Toml, "toml_path_matches")
288}
289
290fn build_equals(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
291    let paths = spec.paths.as_ref().ok_or_else(|| {
292        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
293    })?;
294    let opts: EqualsOptions = spec
295        .deserialize_options()
296        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
297    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
298        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
299    })?;
300    Ok(Box::new(StructuredPathRule {
301        id: spec.id.clone(),
302        level: spec.level,
303        policy_url: spec.policy_url.clone(),
304        message: spec.message.clone(),
305        scope: Scope::from_paths_spec(paths)?,
306        format,
307        path_expr,
308        path_src: opts.path,
309        op: Op::Equals(opts.equals),
310        if_present: opts.if_present,
311    }))
312}
313
314fn build_matches(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
315    let paths = spec.paths.as_ref().ok_or_else(|| {
316        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
317    })?;
318    let opts: MatchesOptions = spec
319        .deserialize_options()
320        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
321    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
322        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
323    })?;
324    let re = Regex::new(&opts.matches).map_err(|e| {
325        Error::rule_config(&spec.id, format!("invalid regex {:?}: {e}", opts.matches))
326    })?;
327    Ok(Box::new(StructuredPathRule {
328        id: spec.id.clone(),
329        level: spec.level,
330        policy_url: spec.policy_url.clone(),
331        message: spec.message.clone(),
332        scope: Scope::from_paths_spec(paths)?,
333        format,
334        path_expr,
335        path_src: opts.path,
336        op: Op::Matches(re),
337        if_present: opts.if_present,
338    }))
339}