Skip to main content

alint_rules/
structured_path.rs

1//! Structured-query rule family:
2//! `{json,yaml,toml,xml}_path_{equals,matches}`.
3//!
4//! Eight rule kinds share a single implementation that varies
5//! along two axes:
6//!
7//! - **Format** — `Json`, `Yaml`, `Toml`, or `Xml`. The file is
8//!   parsed into a `serde_json::Value` tree regardless (YAML and
9//!   TOML coerce through serde; XML maps via the xmltodict-style
10//!   convention in `xml_to_value` — `@attr` / `#text` /
11//!   repeated-element→array, leaf elements collapse to their
12//!   text string, namespaces flatten to local names, every leaf
13//!   is a string), so the `JSONPath` engine only has to reason
14//!   about one tree shape. XML design + open-question
15//!   resolutions: `docs/design/v0.10/xml_path.md`.
16//! - **Op** — `Equals(value)` for exact equality or
17//!   `Matches(regex)` for regex on string values.
18//!
19//! All rule kinds require:
20//!
21//! - `paths` — which files to scan.
22//! - `path` — a `JSONPath` expression (RFC 9535) pointing at the
23//!   values to check.
24//! - Either `equals` (arbitrary YAML value) or `matches`
25//!   (regex string), according to the rule kind.
26//!
27//! ## Semantics
28//!
29//! `JSONPath` can return multiple matches (`$.deps[*].version`).
30//! Every match must satisfy the op; any single mismatch
31//! produces a violation at that match's location. If the query
32//! returns zero matches, that's one "path not found" violation
33//! — the option the user is enforcing doesn't exist.
34//!
35//! The optional **`if_present: true`** flag flips the zero-match
36//! case: under it, zero matches are silently OK, and only
37//! actual matches that fail the op produce violations. Useful
38//! for predicates that only apply when a field is present —
39//! e.g. "every `uses:` in a GitHub Actions workflow must be
40//! pinned to a commit SHA" (a workflow with only `run:` steps
41//! has no `uses:` at all and shouldn't be flagged).
42//!
43//! Unparseable files (bad JSON / YAML / TOML, not-well-formed
44//! XML) produce one violation per file. An unparseable file is a
45//! documentation problem, not the structured rule's concern —
46//! but better to surface it than silently skip.
47
48use std::path::{Path, PathBuf};
49
50use alint_core::{
51    Context, Error, Level, PathsSpec, PerFileRule, Result, Rule, RuleSpec, Scope, Violation,
52};
53use regex::Regex;
54use serde::Deserialize;
55use serde_json::Value;
56use serde_json_path::JsonPath;
57
58/// True when `pattern` is a plain relative-path literal — no
59/// glob metacharacters, no `!` exclude prefix. Mirrors
60/// `file_exists::is_literal_path`; kept local to dodge a
61/// crate-wide pub-helper module just for two rules.
62fn is_literal_path(pattern: &str) -> bool {
63    !pattern.starts_with('!')
64        && !pattern
65            .chars()
66            .any(|c| matches!(c, '*' | '?' | '[' | ']' | '{' | '}'))
67}
68
69/// Collect every literal pattern from `spec` IFF every entry is
70/// a literal AND the spec carries no excludes. Returns `None`
71/// when any pattern is a glob or there are excludes — the slow
72/// path is still correct in those cases.
73fn extract_literal_paths(spec: &PathsSpec) -> Option<Vec<PathBuf>> {
74    let patterns: Vec<&str> = match spec {
75        PathsSpec::Single(s) => vec![s.as_str()],
76        PathsSpec::Many(v) => v.iter().map(String::as_str).collect(),
77        PathsSpec::IncludeExclude { include, exclude } if exclude.is_empty() => {
78            include.iter().map(String::as_str).collect()
79        }
80        PathsSpec::IncludeExclude { .. } => return None,
81    };
82    if patterns.iter().all(|p| is_literal_path(p)) {
83        Some(patterns.iter().map(PathBuf::from).collect())
84    } else {
85        None
86    }
87}
88
89/// Which YAML-flavoured parser to use on the target file.
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91pub enum Format {
92    Json,
93    Yaml,
94    Toml,
95    Xml,
96}
97
98impl Format {
99    pub(crate) fn parse(self, text: &str) -> std::result::Result<Value, String> {
100        match self {
101            Self::Json => serde_json::from_str(text).map_err(|e| e.to_string()),
102            Self::Yaml => serde_yaml_ng::from_str(text).map_err(|e| e.to_string()),
103            Self::Toml => toml::from_str(text).map_err(|e| e.to_string()),
104            Self::Xml => xml_to_value(text),
105        }
106    }
107
108    pub(crate) fn label(self) -> &'static str {
109        match self {
110            Self::Json => "JSON",
111            Self::Yaml => "YAML",
112            Self::Toml => "TOML",
113            Self::Xml => "XML",
114        }
115    }
116
117    /// Detect the format from a path's extension. Returns `None`
118    /// for unknown extensions; callers decide how to fall back
119    /// (require an explicit `format:` override, default to JSON,
120    /// emit a per-file violation, etc).
121    pub(crate) fn detect_from_path(path: &std::path::Path) -> Option<Self> {
122        match path.extension()?.to_str()? {
123            "json" => Some(Self::Json),
124            "yaml" | "yml" => Some(Self::Yaml),
125            "toml" => Some(Self::Toml),
126            "xml" | "csproj" | "props" | "targets" | "vbproj" | "fsproj" | "nuspec" => {
127                Some(Self::Xml)
128            }
129            _ => None,
130        }
131    }
132}
133
134/// Comparison op — keeps the rule builders thin.
135#[derive(Debug)]
136pub enum Op {
137    /// Value at `path` must serialize-compare equal to this
138    /// literal. Any JSON-representable value works (bool,
139    /// number, string, array, object, null).
140    Equals(Value),
141    /// Value at `path` must be a string that the regex matches.
142    /// A non-string match produces a violation with a clear
143    /// `expected string, got <kind>` message.
144    Matches(Regex),
145}
146
147// ---------------------------------------------------------------
148// Options — deserialized from the rule spec's `extra` map.
149// ---------------------------------------------------------------
150
151/// Options shared by every `*_path_equals` rule kind.
152#[derive(Debug, Deserialize)]
153#[serde(deny_unknown_fields)]
154struct EqualsOptions {
155    path: String,
156    equals: Value,
157    #[serde(default)]
158    if_present: bool,
159}
160
161/// Options shared by every `*_path_matches` rule kind.
162#[derive(Debug, Deserialize)]
163#[serde(deny_unknown_fields)]
164struct MatchesOptions {
165    path: String,
166    matches: String,
167    #[serde(default)]
168    if_present: bool,
169}
170
171// ---------------------------------------------------------------
172// Rule
173// ---------------------------------------------------------------
174
175#[derive(Debug)]
176pub struct StructuredPathRule {
177    id: String,
178    level: Level,
179    policy_url: Option<String>,
180    message: Option<String>,
181    scope: Scope,
182    /// `Some(paths)` when every `paths:` entry is a plain
183    /// literal (no glob metacharacters, no `!` excludes). The
184    /// fast path uses these to short-circuit through the
185    /// index's hash-set and skip the O(N) `scope.matches`
186    /// scan — same shape as `file_exists`'s fast path. Driven
187    /// by the bundled `monorepo/cargo-workspace@v1`'s
188    /// `cargo-workspace-member-declares-name` rule, which
189    /// `for_each_dir` instantiates with `paths:
190    /// "{path}/Cargo.toml"` (purely literal after token
191    /// substitution) for every `crates/*` directory; without
192    /// the fast path this is the dominant 1M-scale bottleneck.
193    literal_paths: Option<Vec<PathBuf>>,
194    format: Format,
195    path_expr: JsonPath,
196    path_src: String,
197    op: Op,
198    /// When `true`, a `JSONPath` query that produces zero matches
199    /// is silently OK. When `false` (default), a zero-match query
200    /// is reported as a single violation — the "value being
201    /// enforced doesn't exist" case. Use `true` for predicates
202    /// that are conditional on the field being present (e.g.
203    /// "every `uses:` in a workflow must be SHA-pinned" — a
204    /// workflow with no `uses:` at all shouldn't be flagged).
205    if_present: bool,
206}
207
208impl Rule for StructuredPathRule {
209    fn id(&self) -> &str {
210        &self.id
211    }
212    fn level(&self) -> Level {
213        self.level
214    }
215    fn policy_url(&self) -> Option<&str> {
216        self.policy_url.as_deref()
217    }
218
219    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
220        let mut violations = Vec::new();
221        if let Some(literals) = self.literal_paths.as_ref() {
222            // Fast path: each `paths:` entry is a literal
223            // relative path; we don't need to touch the entry
224            // list at all. `contains_file` is the cheap
225            // membership check; the absolute path comes from
226            // joining `root` with the literal directly.
227            // (`find_file` would re-scan the entries list to
228            // hand back a `&FileEntry`, which we don't need
229            // here — only the bytes — and which would
230            // re-introduce the O(N) work this fast path
231            // exists to avoid.)
232            for literal in literals {
233                if !ctx.index.contains_file(literal) {
234                    continue;
235                }
236                let full = ctx.root.join(literal);
237                let Ok(bytes) = std::fs::read(&full) else {
238                    continue;
239                };
240                violations.extend(self.evaluate_file(ctx, literal, &bytes)?);
241            }
242        } else {
243            for entry in ctx.index.files() {
244                if !self.scope.matches(&entry.path, ctx.index) {
245                    continue;
246                }
247                let full = ctx.root.join(&entry.path);
248                let Ok(bytes) = std::fs::read(&full) else {
249                    // permission / race — silent skip, like other
250                    // content rules
251                    continue;
252                };
253                violations.extend(self.evaluate_file(ctx, &entry.path, &bytes)?);
254            }
255        }
256        Ok(violations)
257    }
258
259    fn as_per_file(&self) -> Option<&dyn PerFileRule> {
260        Some(self)
261    }
262}
263
264impl PerFileRule for StructuredPathRule {
265    fn path_scope(&self) -> &Scope {
266        &self.scope
267    }
268
269    fn evaluate_file(
270        &self,
271        _ctx: &Context<'_>,
272        path: &Path,
273        bytes: &[u8],
274    ) -> Result<Vec<Violation>> {
275        let Ok(text) = std::str::from_utf8(bytes) else {
276            return Ok(Vec::new());
277        };
278        let root_value = match self.format.parse(text) {
279            Ok(v) => v,
280            Err(err) => {
281                return Ok(vec![
282                    Violation::new(format!(
283                        "not a valid {} document: {err}",
284                        self.format.label()
285                    ))
286                    .with_path(std::sync::Arc::<Path>::from(path)),
287                ]);
288            }
289        };
290        let matches = self.path_expr.query(&root_value);
291        if matches.is_empty() {
292            if self.if_present {
293                return Ok(Vec::new());
294            }
295            let msg = self
296                .message
297                .clone()
298                .unwrap_or_else(|| format!("JSONPath `{}` produced no match", self.path_src));
299            return Ok(vec![
300                Violation::new(msg).with_path(std::sync::Arc::<Path>::from(path)),
301            ]);
302        }
303        let mut violations = Vec::new();
304        for m in matches.iter() {
305            if let Some(v) = check_match(m, &self.op) {
306                let base = self.message.clone().unwrap_or(v);
307                violations.push(Violation::new(base).with_path(std::sync::Arc::<Path>::from(path)));
308            }
309        }
310        Ok(violations)
311    }
312}
313
314/// Return `Some(message)` if the match fails the op; `None` if it passes.
315fn check_match(m: &Value, op: &Op) -> Option<String> {
316    match op {
317        Op::Equals(expected) => {
318            if m == expected {
319                None
320            } else {
321                Some(format!(
322                    "value at path does not equal expected: expected {}, got {}",
323                    short_render(expected),
324                    short_render(m),
325                ))
326            }
327        }
328        Op::Matches(re) => {
329            let Some(s) = m.as_str() else {
330                return Some(format!(
331                    "value at path is not a string (got {}), can't apply regex",
332                    kind_name(m)
333                ));
334            };
335            if re.is_match(s) {
336                None
337            } else {
338                Some(format!(
339                    "value at path {} does not match regex {}",
340                    short_render(m),
341                    re.as_str(),
342                ))
343            }
344        }
345    }
346}
347
348/// A stable, short rendering for error messages. Avoids
349/// dumping a whole object when the mismatch is on a sub-key.
350fn short_render(v: &Value) -> String {
351    let raw = v.to_string();
352    if raw.len() <= 80 {
353        raw
354    } else {
355        format!("{}…", &raw[..80])
356    }
357}
358
359fn kind_name(v: &Value) -> &'static str {
360    match v {
361        Value::Null => "null",
362        Value::Bool(_) => "bool",
363        Value::Number(_) => "number",
364        Value::String(_) => "string",
365        Value::Array(_) => "array",
366        Value::Object(_) => "object",
367    }
368}
369
370// ---------------------------------------------------------------
371// XML → serde_json::Value
372//
373// xmltodict-style convention so the JSONPath a user writes reads
374// like the XML they see. Full rationale + false-positive surface:
375// `docs/design/v0.10/xml_path.md`.
376// ---------------------------------------------------------------
377
378/// Maximum XML element-nesting depth `xml_to_value` will
379/// descend. Real config/manifest XML (`.csproj`, `pom.xml`, …)
380/// is a handful of levels deep; 256 is far beyond any real
381/// manifest yet far below the recursion depth that would
382/// overflow the stack. A document nested deeper is rejected as a
383/// parse error (one per-file violation via the existing
384/// parse-error path) rather than recursed into — a crafted or
385/// accidental deeply-nested file must never abort the run. The
386/// other formats' parsers carry their own internal recursion
387/// limits; this is the XML arm's equivalent.
388const MAX_XML_DEPTH: usize = 256;
389
390/// Parse XML into the same `serde_json::Value` tree the rest of
391/// the family queries. The document maps to
392/// `{ <root-element-name>: <root value> }` so the root element is
393/// the first `JSONPath` segment (`$.Project…`, `$.project…`).
394fn xml_to_value(text: &str) -> std::result::Result<Value, String> {
395    let doc = roxmltree::Document::parse(text).map_err(|e| e.to_string())?;
396    let root = doc.root_element();
397    let mut obj = serde_json::Map::new();
398    obj.insert(
399        root.tag_name().name().to_owned(),
400        element_to_value(root, 0)?,
401    );
402    Ok(Value::Object(obj))
403}
404
405/// One element → its `Value`. Attributes become `@name` keys;
406/// repeated child elements of the same (local) name become a JSON
407/// array in document order; loose text becomes `#text` when the
408/// element also has attributes/children, or *is* the value when
409/// the element is a pure leaf. Empty element → `null`. Namespaces
410/// are flattened to the local name (Open question 1 in the design
411/// doc). `depth` bounds recursion at `MAX_XML_DEPTH`: past the
412/// bound it returns `Err` (surfaced as one parse-error violation
413/// via the caller) instead of recursing into a stack abort.
414fn element_to_value(node: roxmltree::Node, depth: usize) -> std::result::Result<Value, String> {
415    if depth >= MAX_XML_DEPTH {
416        return Err(format!(
417            "XML nesting exceeds the maximum supported depth ({MAX_XML_DEPTH})"
418        ));
419    }
420    let mut obj = serde_json::Map::new();
421    for attr in node.attributes() {
422        obj.insert(
423            format!("@{}", attr.name()),
424            Value::String(attr.value().to_owned()),
425        );
426    }
427    let mut has_child_elem = false;
428    for child in node.children().filter(roxmltree::Node::is_element) {
429        has_child_elem = true;
430        let name = child.tag_name().name().to_owned();
431        let val = element_to_value(child, depth + 1)?;
432        match obj.get_mut(&name) {
433            Some(Value::Array(arr)) => arr.push(val),
434            Some(slot) => {
435                let prev = slot.take();
436                *slot = Value::Array(vec![prev, val]);
437            }
438            None => {
439                obj.insert(name, val);
440            }
441        }
442    }
443    let text: String = node
444        .children()
445        .filter(roxmltree::Node::is_text)
446        .filter_map(|n| n.text())
447        .collect();
448    let text = text.trim();
449    if obj.is_empty() && !has_child_elem {
450        return Ok(if text.is_empty() {
451            Value::Null
452        } else {
453            Value::String(text.to_owned())
454        });
455    }
456    if !text.is_empty() {
457        obj.insert("#text".to_owned(), Value::String(text.to_owned()));
458    }
459    Ok(Value::Object(obj))
460}
461
462// ---------------------------------------------------------------
463// Builders
464//
465// Eight thin wrappers per (Format, Op) combination. Each consumes
466// the spec, validates the structured-query options, and
467// constructs the shared `StructuredPathRule`.
468// ---------------------------------------------------------------
469
470pub fn json_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
471    build_equals(spec, Format::Json, "json_path_equals")
472}
473
474pub fn json_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
475    build_matches(spec, Format::Json, "json_path_matches")
476}
477
478pub fn yaml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
479    build_equals(spec, Format::Yaml, "yaml_path_equals")
480}
481
482pub fn yaml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
483    build_matches(spec, Format::Yaml, "yaml_path_matches")
484}
485
486pub fn toml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
487    build_equals(spec, Format::Toml, "toml_path_equals")
488}
489
490pub fn toml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
491    build_matches(spec, Format::Toml, "toml_path_matches")
492}
493
494pub fn xml_path_equals_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
495    build_equals(spec, Format::Xml, "xml_path_equals")
496}
497
498pub fn xml_path_matches_build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
499    build_matches(spec, Format::Xml, "xml_path_matches")
500}
501
502fn build_equals(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
503    let paths = spec.paths.as_ref().ok_or_else(|| {
504        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
505    })?;
506    let opts: EqualsOptions = spec
507        .deserialize_options()
508        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
509    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
510        Error::rule_config(
511            &spec.id,
512            alint_core::jsonpath_diagnostics::format_parse_error(&opts.path, e),
513        )
514    })?;
515    Ok(Box::new(StructuredPathRule {
516        id: spec.id.clone(),
517        level: spec.level,
518        policy_url: spec.policy_url.clone(),
519        message: spec.message.clone(),
520        scope: Scope::from_spec(spec)?,
521        literal_paths: extract_literal_paths(paths),
522        format,
523        path_expr,
524        path_src: opts.path,
525        op: Op::Equals(opts.equals),
526        if_present: opts.if_present,
527    }))
528}
529
530fn build_matches(spec: &RuleSpec, format: Format, kind_label: &str) -> Result<Box<dyn Rule>> {
531    let paths = spec.paths.as_ref().ok_or_else(|| {
532        Error::rule_config(&spec.id, format!("{kind_label} requires a `paths` field"))
533    })?;
534    let opts: MatchesOptions = spec
535        .deserialize_options()
536        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
537    let path_expr = JsonPath::parse(&opts.path).map_err(|e| {
538        Error::rule_config(
539            &spec.id,
540            alint_core::jsonpath_diagnostics::format_parse_error(&opts.path, e),
541        )
542    })?;
543    let re = Regex::new(&opts.matches).map_err(|e| {
544        Error::rule_config(&spec.id, format!("invalid regex {:?}: {e}", opts.matches))
545    })?;
546    Ok(Box::new(StructuredPathRule {
547        id: spec.id.clone(),
548        level: spec.level,
549        policy_url: spec.policy_url.clone(),
550        message: spec.message.clone(),
551        scope: Scope::from_spec(spec)?,
552        literal_paths: extract_literal_paths(paths),
553        format,
554        path_expr,
555        path_src: opts.path,
556        op: Op::Matches(re),
557        if_present: opts.if_present,
558    }))
559}
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564    use crate::test_support::{ctx, spec_yaml, tempdir_with_files};
565
566    // ─── build-path errors ────────────────────────────────────
567
568    #[test]
569    fn build_rejects_missing_paths() {
570        let spec = spec_yaml(
571            "id: t\n\
572             kind: json_path_equals\n\
573             path: \"$.name\"\n\
574             equals: \"x\"\n\
575             level: error\n",
576        );
577        assert!(json_path_equals_build(&spec).is_err());
578    }
579
580    #[test]
581    fn build_rejects_invalid_jsonpath() {
582        let spec = spec_yaml(
583            "id: t\n\
584             kind: json_path_equals\n\
585             paths: \"package.json\"\n\
586             path: \"$..[invalid\"\n\
587             equals: \"x\"\n\
588             level: error\n",
589        );
590        assert!(json_path_equals_build(&spec).is_err());
591    }
592
593    #[test]
594    fn build_rejects_invalid_regex_in_matches() {
595        let spec = spec_yaml(
596            "id: t\n\
597             kind: json_path_matches\n\
598             paths: \"package.json\"\n\
599             path: \"$.version\"\n\
600             matches: \"[unterminated\"\n\
601             level: error\n",
602        );
603        // Must fail in the regex-compile path (not via
604        // deny_unknown_fields on a typo'd `pattern:` key — the
605        // latent bug this previously had).
606        let e = json_path_matches_build(&spec).unwrap_err().to_string();
607        assert!(e.contains("regex"), "expected a regex error, got: {e}");
608    }
609
610    // ─── json_path_equals ─────────────────────────────────────
611
612    #[test]
613    fn json_path_equals_passes_when_value_matches() {
614        let spec = spec_yaml(
615            "id: t\n\
616             kind: json_path_equals\n\
617             paths: \"package.json\"\n\
618             path: \"$.name\"\n\
619             equals: \"demo\"\n\
620             level: error\n",
621        );
622        let rule = json_path_equals_build(&spec).unwrap();
623        let (tmp, idx) =
624            tempdir_with_files(&[("package.json", br#"{"name":"demo","version":"1.0.0"}"#)]);
625        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
626        assert!(v.is_empty(), "matching value should pass: {v:?}");
627    }
628
629    #[test]
630    fn json_path_equals_fires_on_mismatch() {
631        let spec = spec_yaml(
632            "id: t\n\
633             kind: json_path_equals\n\
634             paths: \"package.json\"\n\
635             path: \"$.name\"\n\
636             equals: \"demo\"\n\
637             level: error\n",
638        );
639        let rule = json_path_equals_build(&spec).unwrap();
640        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"name":"other"}"#)]);
641        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
642        assert_eq!(v.len(), 1);
643    }
644
645    #[test]
646    fn json_path_equals_fires_on_missing_path() {
647        let spec = spec_yaml(
648            "id: t\n\
649             kind: json_path_equals\n\
650             paths: \"package.json\"\n\
651             path: \"$.name\"\n\
652             equals: \"demo\"\n\
653             level: error\n",
654        );
655        let rule = json_path_equals_build(&spec).unwrap();
656        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.0"}"#)]);
657        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
658        assert_eq!(v.len(), 1, "missing path should fire");
659    }
660
661    #[test]
662    fn json_path_if_present_silent_on_missing() {
663        // `if_present: true` → missing path is silent.
664        let spec = spec_yaml(
665            "id: t\n\
666             kind: json_path_equals\n\
667             paths: \"package.json\"\n\
668             path: \"$.name\"\n\
669             equals: \"demo\"\n\
670             if_present: true\n\
671             level: error\n",
672        );
673        let rule = json_path_equals_build(&spec).unwrap();
674        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.0"}"#)]);
675        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
676        assert!(v.is_empty(), "if_present should silence: {v:?}");
677    }
678
679    // ─── json_path_matches ────────────────────────────────────
680
681    #[test]
682    fn json_path_matches_passes_on_pattern_hit() {
683        let spec = spec_yaml(
684            "id: t\n\
685             kind: json_path_matches\n\
686             paths: \"package.json\"\n\
687             path: \"$.version\"\n\
688             matches: \"^\\\\d+\\\\.\\\\d+\\\\.\\\\d+$\"\n\
689             level: error\n",
690        );
691        let rule = json_path_matches_build(&spec).unwrap();
692        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"1.2.3"}"#)]);
693        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
694        assert!(v.is_empty(), "matching version should pass: {v:?}");
695    }
696
697    #[test]
698    fn json_path_matches_fires_on_pattern_miss() {
699        let spec = spec_yaml(
700            "id: t\n\
701             kind: json_path_matches\n\
702             paths: \"package.json\"\n\
703             path: \"$.version\"\n\
704             matches: \"^\\\\d+\\\\.\\\\d+\\\\.\\\\d+$\"\n\
705             level: error\n",
706        );
707        let rule = json_path_matches_build(&spec).unwrap();
708        let (tmp, idx) = tempdir_with_files(&[("package.json", br#"{"version":"v1.x"}"#)]);
709        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
710        assert_eq!(v.len(), 1);
711    }
712
713    // ─── yaml_path_* ─────────────────────────────────────────
714
715    #[test]
716    fn yaml_path_equals_passes_when_value_matches() {
717        let spec = spec_yaml(
718            "id: t\n\
719             kind: yaml_path_equals\n\
720             paths: \".github/workflows/*.yml\"\n\
721             path: \"$.name\"\n\
722             equals: \"CI\"\n\
723             level: error\n",
724        );
725        let rule = yaml_path_equals_build(&spec).unwrap();
726        let (tmp, idx) = tempdir_with_files(&[(
727            ".github/workflows/ci.yml",
728            b"name: CI\non: push\njobs: {}\n",
729        )]);
730        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
731        assert!(v.is_empty(), "matching name should pass: {v:?}");
732    }
733
734    #[test]
735    fn yaml_path_matches_uses_bracket_notation_for_dashed_keys() {
736        // Per the memory note: dashed YAML keys need bracket
737        // notation (`$.foo['dashed-key']`) because the JSONPath
738        // dot-form can't parse them.
739        let spec = spec_yaml(
740            "id: t\n\
741             kind: yaml_path_matches\n\
742             paths: \"action.yml\"\n\
743             path: \"$.runs['using']\"\n\
744             matches: \"^node\\\\d+$\"\n\
745             level: error\n",
746        );
747        let rule = yaml_path_matches_build(&spec).unwrap();
748        let (tmp, idx) =
749            tempdir_with_files(&[("action.yml", b"runs:\n  using: node20\n  main: index.js\n")]);
750        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
751        assert!(v.is_empty(), "bracket notation should match: {v:?}");
752    }
753
754    // ─── toml_path_* ─────────────────────────────────────────
755
756    #[test]
757    fn toml_path_equals_passes_when_value_matches() {
758        let spec = spec_yaml(
759            "id: t\n\
760             kind: toml_path_equals\n\
761             paths: \"Cargo.toml\"\n\
762             path: \"$.package.edition\"\n\
763             equals: \"2024\"\n\
764             level: error\n",
765        );
766        let rule = toml_path_equals_build(&spec).unwrap();
767        let (tmp, idx) = tempdir_with_files(&[(
768            "Cargo.toml",
769            b"[package]\nname = \"x\"\nedition = \"2024\"\n",
770        )]);
771        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
772        assert!(v.is_empty(), "matching edition should pass: {v:?}");
773    }
774
775    #[test]
776    fn toml_path_matches_fires_on_floating_version() {
777        // Common policy: deps must be tilde-pinned, not bare.
778        let spec = spec_yaml(
779            "id: t\n\
780             kind: toml_path_matches\n\
781             paths: \"Cargo.toml\"\n\
782             path: \"$.dependencies.serde\"\n\
783             matches: \"^[~=]\"\n\
784             level: error\n",
785        );
786        let rule = toml_path_matches_build(&spec).unwrap();
787        let (tmp, idx) = tempdir_with_files(&[(
788            "Cargo.toml",
789            b"[package]\nname = \"x\"\n[dependencies]\nserde = \"1\"\n",
790        )]);
791        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
792        assert_eq!(v.len(), 1, "floating `serde = \"1\"` should fire");
793    }
794
795    // ─── xml_path_* ──────────────────────────────────────────
796
797    #[test]
798    fn xml_path_equals_passes_on_csproj_leaf() {
799        let spec = spec_yaml(
800            "id: t\n\
801             kind: xml_path_equals\n\
802             paths: \"App.csproj\"\n\
803             path: \"$.Project.PropertyGroup.TargetFramework\"\n\
804             equals: \"net8.0\"\n\
805             level: error\n",
806        );
807        let rule = xml_path_equals_build(&spec).unwrap();
808        let (tmp, idx) = tempdir_with_files(&[(
809            "App.csproj",
810            br#"<Project Sdk="Microsoft.NET.Sdk"><PropertyGroup><TargetFramework>net8.0</TargetFramework></PropertyGroup></Project>"#,
811        )]);
812        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
813        assert!(v.is_empty(), "leaf element should match: {v:?}");
814    }
815
816    #[test]
817    fn xml_path_equals_fires_on_csproj_mismatch() {
818        let spec = spec_yaml(
819            "id: t\n\
820             kind: xml_path_equals\n\
821             paths: \"App.csproj\"\n\
822             path: \"$.Project.PropertyGroup.TargetFramework\"\n\
823             equals: \"net8.0\"\n\
824             level: error\n",
825        );
826        let rule = xml_path_equals_build(&spec).unwrap();
827        let (tmp, idx) = tempdir_with_files(&[(
828            "App.csproj",
829            br"<Project><PropertyGroup><TargetFramework>net6.0</TargetFramework></PropertyGroup></Project>",
830        )]);
831        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
832        assert_eq!(v.len(), 1);
833    }
834
835    #[test]
836    fn xml_path_matches_on_packageref_attribute_array() {
837        // Repeated <PackageReference> → array; `@Version`
838        // attribute reached via bracket notation; every match
839        // must be a non-empty version-ish string.
840        let spec = spec_yaml(
841            "id: t\n\
842             kind: xml_path_matches\n\
843             paths: \"App.csproj\"\n\
844             path: \"$.Project.ItemGroup.PackageReference[*]['@Version']\"\n\
845             matches: \"^\\\\d\"\n\
846             level: error\n",
847        );
848        let rule = xml_path_matches_build(&spec).unwrap();
849        let (tmp, idx) = tempdir_with_files(&[(
850            "App.csproj",
851            br#"<Project><ItemGroup><PackageReference Include="A" Version="1.2.3"/><PackageReference Include="B" Version="4.0.0"/></ItemGroup></Project>"#,
852        )]);
853        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
854        assert!(v.is_empty(), "both @Version attrs should match: {v:?}");
855    }
856
857    #[test]
858    fn xml_pom_namespace_flattened_and_repeated_dependency_array() {
859        // Maven default namespace must not leak into the query;
860        // repeated <dependency> must be an array.
861        let pom = br#"<project xmlns="http://maven.apache.org/POM/4.0.0"><modelVersion>4.0.0</modelVersion><dependencies><dependency><artifactId>guava</artifactId></dependency><dependency><artifactId>junit</artifactId></dependency></dependencies></project>"#;
862        let eq = spec_yaml(
863            "id: t\n\
864             kind: xml_path_equals\n\
865             paths: \"pom.xml\"\n\
866             path: \"$.project.modelVersion\"\n\
867             equals: \"4.0.0\"\n\
868             level: error\n",
869        );
870        let (tmp, idx) = tempdir_with_files(&[("pom.xml", pom)]);
871        assert!(
872            xml_path_equals_build(&eq)
873                .unwrap()
874                .evaluate(&ctx(tmp.path(), &idx))
875                .unwrap()
876                .is_empty(),
877            "namespace-flattened modelVersion should match"
878        );
879        let m = spec_yaml(
880            "id: t\n\
881             kind: xml_path_matches\n\
882             paths: \"pom.xml\"\n\
883             path: \"$.project.dependencies.dependency[*].artifactId\"\n\
884             matches: \"^[a-z]+$\"\n\
885             level: error\n",
886        );
887        let v = xml_path_matches_build(&m)
888            .unwrap()
889            .evaluate(&ctx(tmp.path(), &idx))
890            .unwrap();
891        assert!(v.is_empty(), "both deps' artifactId should match: {v:?}");
892    }
893
894    #[test]
895    fn xml_path_if_present_silences_missing() {
896        let spec = spec_yaml(
897            "id: t\n\
898             kind: xml_path_equals\n\
899             paths: \"App.csproj\"\n\
900             path: \"$.Project.PropertyGroup.Nullable\"\n\
901             equals: \"enable\"\n\
902             if_present: true\n\
903             level: error\n",
904        );
905        let rule = xml_path_equals_build(&spec).unwrap();
906        let (tmp, idx) = tempdir_with_files(&[(
907            "App.csproj",
908            br"<Project><PropertyGroup><TargetFramework>net8.0</TargetFramework></PropertyGroup></Project>",
909        )]);
910        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
911        assert!(v.is_empty(), "if_present should silence missing: {v:?}");
912    }
913
914    #[test]
915    fn xml_malformed_fires_one_violation() {
916        let spec = spec_yaml(
917            "id: t\n\
918             kind: xml_path_equals\n\
919             paths: \"App.csproj\"\n\
920             path: \"$.Project\"\n\
921             equals: \"x\"\n\
922             level: error\n",
923        );
924        let rule = xml_path_equals_build(&spec).unwrap();
925        let (tmp, idx) = tempdir_with_files(&[("App.csproj", b"<Project><Unclosed></Project>")]);
926        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
927        assert_eq!(v.len(), 1, "not-well-formed XML should fire once");
928        assert!(v[0].message.contains("XML"), "{:?}", v[0].message);
929    }
930
931    #[test]
932    fn xml_deeply_nested_is_a_parse_error_not_an_abort() {
933        // P1 regression: unbounded recursion would `abort()` the
934        // whole process. The `MAX_XML_DEPTH` guard must instead
935        // yield exactly one ordinary parse-error violation for
936        // the file (no panic, no abort, per-file contained).
937        let depth = MAX_XML_DEPTH + 50;
938        let xml = format!("{}deep{}", "<a>".repeat(depth), "</a>".repeat(depth));
939        let spec = spec_yaml(
940            "id: t\n\
941             kind: xml_path_equals\n\
942             paths: \"deep.xml\"\n\
943             path: \"$.a\"\n\
944             equals: \"x\"\n\
945             level: error\n",
946        );
947        let rule = xml_path_equals_build(&spec).unwrap();
948        let (tmp, idx) = tempdir_with_files(&[("deep.xml", xml.as_bytes())]);
949        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
950        assert_eq!(
951            v.len(),
952            1,
953            "deeply-nested XML must yield exactly one parse-error violation: {v:?}"
954        );
955        assert!(
956            v[0].message.contains("not a valid XML") && v[0].message.contains("depth"),
957            "expected a depth parse-error message, got: {}",
958            v[0].message
959        );
960    }
961
962    #[test]
963    fn xml_leaf_values_are_string_typed() {
964        // Documented gotcha: every XML leaf is a string. A
965        // quoted `equals: "8"` matches; a bare `equals: 8`
966        // (a YAML integer) does not.
967        let xml: &[u8] = b"<Config><n>8</n></Config>";
968        let as_str = spec_yaml(
969            "id: t\n\
970             kind: xml_path_equals\n\
971             paths: \"c.xml\"\n\
972             path: \"$.Config.n\"\n\
973             equals: \"8\"\n\
974             level: error\n",
975        );
976        let (tmp, idx) = tempdir_with_files(&[("c.xml", xml)]);
977        assert!(
978            xml_path_equals_build(&as_str)
979                .unwrap()
980                .evaluate(&ctx(tmp.path(), &idx))
981                .unwrap()
982                .is_empty(),
983            "string 8 should match the string-typed leaf"
984        );
985        let as_int = spec_yaml(
986            "id: t\n\
987             kind: xml_path_equals\n\
988             paths: \"c.xml\"\n\
989             path: \"$.Config.n\"\n\
990             equals: 8\n\
991             level: error\n",
992        );
993        let v = xml_path_equals_build(&as_int)
994            .unwrap()
995            .evaluate(&ctx(tmp.path(), &idx))
996            .unwrap();
997        assert_eq!(v.len(), 1, "integer 8 must NOT equal string \"8\"");
998    }
999
1000    #[test]
1001    fn xml_empty_element_is_null() {
1002        // Design-doc promise (was untested): an empty element
1003        // maps to JSON null — `equals: null` matches; `equals:
1004        // ""` does not (it is null, not an empty string).
1005        let xml: &[u8] = b"<Config><empty/></Config>";
1006        let (tmp, idx) = tempdir_with_files(&[("c.xml", xml)]);
1007        let as_null = spec_yaml(
1008            "id: t\nkind: xml_path_equals\npaths: \"c.xml\"\n\
1009             path: \"$.Config.empty\"\nequals: null\nlevel: error\n",
1010        );
1011        assert!(
1012            xml_path_equals_build(&as_null)
1013                .unwrap()
1014                .evaluate(&ctx(tmp.path(), &idx))
1015                .unwrap()
1016                .is_empty(),
1017            "an empty element must equal null"
1018        );
1019        let as_empty_str = spec_yaml(
1020            "id: t\nkind: xml_path_equals\npaths: \"c.xml\"\n\
1021             path: \"$.Config.empty\"\nequals: \"\"\nlevel: error\n",
1022        );
1023        assert_eq!(
1024            xml_path_equals_build(&as_empty_str)
1025                .unwrap()
1026                .evaluate(&ctx(tmp.path(), &idx))
1027                .unwrap()
1028                .len(),
1029            1,
1030            "null must NOT equal the empty string"
1031        );
1032    }
1033
1034    // ─── parse error path ─────────────────────────────────────
1035
1036    #[test]
1037    fn evaluate_fires_on_malformed_input() {
1038        let spec = spec_yaml(
1039            "id: t\n\
1040             kind: json_path_equals\n\
1041             paths: \"package.json\"\n\
1042             path: \"$.name\"\n\
1043             equals: \"x\"\n\
1044             level: error\n",
1045        );
1046        let rule = json_path_equals_build(&spec).unwrap();
1047        let (tmp, idx) = tempdir_with_files(&[("package.json", b"{not valid json")]);
1048        let v = rule.evaluate(&ctx(tmp.path(), &idx)).unwrap();
1049        assert_eq!(v.len(), 1, "malformed JSON should fire one violation");
1050    }
1051}