Skip to main content

alint_rules/
structured_path.rs

1//! Structured-query rule family: `{json,yaml,toml}_path_{equals,matches}`.
2//!
3//! Six rule kinds share a single implementation that varies
4//! along two axes:
5//!
6//! - **Format** — `Json`, `Yaml`, or `Toml`. The file is parsed
7//!   into a `serde_json::Value` tree regardless (YAML and TOML
8//!   values coerce through serde), so the `JSONPath` engine only
9//!   has to reason about one tree shape.
10//! - **Op** — `Equals(value)` for exact equality or
11//!   `Matches(regex)` for regex on string values.
12//!
13//! All rule kinds require:
14//!
15//! - `paths` — which files to scan.
16//! - `path` — a `JSONPath` expression (RFC 9535) pointing at the
17//!   values to check.
18//! - Either `equals` (arbitrary YAML value) or `matches`
19//!   (regex string), according to the rule kind.
20//!
21//! ## Semantics
22//!
23//! `JSONPath` can return multiple matches (`$.deps[*].version`).
24//! Every match must satisfy the op; any single mismatch
25//! produces a violation at that match's location. If the query
26//! returns zero matches, that's one "path not found" violation
27//! — the option the user is enforcing doesn't exist.
28//!
29//! Unparseable files (bad JSON / YAML / TOML) produce one
30//! violation per file. An unparseable file is a documentation
31//! problem, not the structured rule's concern — but better to
32//! surface it than silently skip.
33
34use std::io::Read;
35
36use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
37use regex::Regex;
38use serde::Deserialize;
39use serde_json::Value;
40use serde_json_path::JsonPath;
41
42/// Which YAML-flavoured parser to use on the target file.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum Format {
45    Json,
46    Yaml,
47    Toml,
48}
49
50impl Format {
51    fn parse(self, text: &str) -> std::result::Result<Value, String> {
52        match self {
53            Self::Json => serde_json::from_str(text).map_err(|e| e.to_string()),
54            Self::Yaml => serde_yaml_ng::from_str(text).map_err(|e| e.to_string()),
55            Self::Toml => toml::from_str(text).map_err(|e| e.to_string()),
56        }
57    }
58
59    fn label(self) -> &'static str {
60        match self {
61            Self::Json => "JSON",
62            Self::Yaml => "YAML",
63            Self::Toml => "TOML",
64        }
65    }
66}
67
68/// Comparison op — keeps the rule builders thin.
69#[derive(Debug)]
70pub enum Op {
71    /// Value at `path` must serialize-compare equal to this
72    /// literal. Any JSON-representable value works (bool,
73    /// number, string, array, object, null).
74    Equals(Value),
75    /// Value at `path` must be a string that the regex matches.
76    /// A non-string match produces a violation with a clear
77    /// "expected string, got <kind>" message.
78    Matches(Regex),
79}
80
81// ---------------------------------------------------------------
82// Options — deserialized from the rule spec's `extra` map.
83// ---------------------------------------------------------------
84
85/// Options shared by every `*_path_equals` rule kind.
86#[derive(Debug, Deserialize)]
87struct EqualsOptions {
88    path: String,
89    equals: Value,
90}
91
92/// Options shared by every `*_path_matches` rule kind.
93#[derive(Debug, Deserialize)]
94struct MatchesOptions {
95    path: String,
96    matches: String,
97}
98
99// ---------------------------------------------------------------
100// Rule
101// ---------------------------------------------------------------
102
103#[derive(Debug)]
104pub struct StructuredPathRule {
105    id: String,
106    level: Level,
107    policy_url: Option<String>,
108    message: Option<String>,
109    scope: Scope,
110    format: Format,
111    path_expr: JsonPath,
112    path_src: String,
113    op: Op,
114}
115
116impl Rule for StructuredPathRule {
117    fn id(&self) -> &str {
118        &self.id
119    }
120    fn level(&self) -> Level {
121        self.level
122    }
123    fn policy_url(&self) -> Option<&str> {
124        self.policy_url.as_deref()
125    }
126
127    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
128        let mut violations = Vec::new();
129        for entry in ctx.index.files() {
130            if !self.scope.matches(&entry.path) {
131                continue;
132            }
133            let full = ctx.root.join(&entry.path);
134            let Ok(text) = read_to_string(&full) else {
135                // permission / race — silent skip, like other
136                // content rules
137                continue;
138            };
139            let root_value = match self.format.parse(&text) {
140                Ok(v) => v,
141                Err(err) => {
142                    violations.push(
143                        Violation::new(format!(
144                            "not a valid {} document: {err}",
145                            self.format.label()
146                        ))
147                        .with_path(&entry.path),
148                    );
149                    continue;
150                }
151            };
152            let matches = self.path_expr.query(&root_value);
153            if matches.is_empty() {
154                let msg = self
155                    .message
156                    .clone()
157                    .unwrap_or_else(|| format!("JSONPath `{}` produced no match", self.path_src));
158                violations.push(Violation::new(msg).with_path(&entry.path));
159                continue;
160            }
161            for m in matches.iter() {
162                if let Some(v) = check_match(m, &self.op) {
163                    let base = self.message.clone().unwrap_or(v);
164                    violations.push(Violation::new(base).with_path(&entry.path));
165                }
166            }
167        }
168        Ok(violations)
169    }
170}
171
172/// Return `Some(message)` if the match fails the op; `None` if it passes.
173fn check_match(m: &Value, op: &Op) -> Option<String> {
174    match op {
175        Op::Equals(expected) => {
176            if m == expected {
177                None
178            } else {
179                Some(format!(
180                    "value at path does not equal expected: expected {}, got {}",
181                    short_render(expected),
182                    short_render(m),
183                ))
184            }
185        }
186        Op::Matches(re) => {
187            let Some(s) = m.as_str() else {
188                return Some(format!(
189                    "value at path is not a string (got {}), can't apply regex",
190                    kind_name(m)
191                ));
192            };
193            if re.is_match(s) {
194                None
195            } else {
196                Some(format!(
197                    "value at path {} does not match regex {}",
198                    short_render(m),
199                    re.as_str(),
200                ))
201            }
202        }
203    }
204}
205
206/// A stable, short rendering for error messages. Avoids
207/// dumping a whole object when the mismatch is on a sub-key.
208fn short_render(v: &Value) -> String {
209    let raw = v.to_string();
210    if raw.len() <= 80 {
211        raw
212    } else {
213        format!("{}…", &raw[..80])
214    }
215}
216
217fn kind_name(v: &Value) -> &'static str {
218    match v {
219        Value::Null => "null",
220        Value::Bool(_) => "bool",
221        Value::Number(_) => "number",
222        Value::String(_) => "string",
223        Value::Array(_) => "array",
224        Value::Object(_) => "object",
225    }
226}
227
228fn read_to_string(path: &std::path::Path) -> std::io::Result<String> {
229    let mut f = std::fs::File::open(path)?;
230    let mut s = String::new();
231    f.read_to_string(&mut s)?;
232    Ok(s)
233}
234
235// ---------------------------------------------------------------
236// Builders
237//
238// Six thin wrappers per (Format, Op) combination. Each consumes
239// the spec, validates the structured-query options, and
240// constructs the shared `StructuredPathRule`.
241// ---------------------------------------------------------------
242
243pub fn json_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
244    build_equals(spec, Format::Json, "json_path_equals")
245}
246
247pub fn json_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
248    build_matches(spec, Format::Json, "json_path_matches")
249}
250
251pub fn yaml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
252    build_equals(spec, Format::Yaml, "yaml_path_equals")
253}
254
255pub fn yaml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
256    build_matches(spec, Format::Yaml, "yaml_path_matches")
257}
258
259pub fn toml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
260    build_equals(spec, Format::Toml, "toml_path_equals")
261}
262
263pub fn toml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
264    build_matches(spec, Format::Toml, "toml_path_matches")
265}
266
267fn build_equals(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
268    let paths = spec.paths.as_ref().ok_or_else(|| {
269        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
270    })?;
271    let opts: EqualsOptions = spec
272        .deserialize_options()
273        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
274    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
275        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
276    })?;
277    Ok(Box::new(StructuredPathRule {
278        id: spec.id.clone(),
279        level: spec.level,
280        policy_url: spec.policy_url.clone(),
281        message: spec.message.clone(),
282        scope: Scope::from_paths_spec(paths)?,
283        format,
284        path_expr,
285        path_src: opts.path,
286        op: Op::Equals(opts.equals),
287    }))
288}
289
290fn build_matches(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
291    let paths = spec.paths.as_ref().ok_or_else(|| {
292        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
293    })?;
294    let opts: MatchesOptions = spec
295        .deserialize_options()
296        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
297    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
298        Error::rule_config(&spec.id, format!("invalid JSONPath {:?}: {e}", opts.path))
299    })?;
300    let re = Regex::new(&opts.matches).map_err(|e| {
301        Error::rule_config(&spec.id, format!("invalid regex {:?}: {e}", opts.matches))
302    })?;
303    Ok(Box::new(StructuredPathRule {
304        id: spec.id.clone(),
305        level: spec.level,
306        policy_url: spec.policy_url.clone(),
307        message: spec.message.clone(),
308        scope: Scope::from_paths_spec(paths)?,
309        format,
310        path_expr,
311        path_src: opts.path,
312        op: Op::Matches(re),
313    }))
314}