Skip to main content

alint_rules/
cross_file_value_equals.rs

1//! `cross_file_value_equals` — a value extracted from one
2//! authoritative file must equal a value extracted from one or
3//! more other files. The cross-file value-coherence sibling of
4//! `registry_paths_resolve` (path existence); shares
5//! `crate::extract`. Design + open-question resolutions:
6//! `docs/design/v0.10/cross_file_value_equals.md`.
7//!
8//! ```yaml
9//! - id: workspace-versions-coherent
10//!   kind: cross_file_value_equals
11//!   source:
12//!     file: Cargo.toml
13//!     extract: { toml: "$.workspace.package.version" }
14//!   targets:                       # form (a): glob + one extract
15//!     files: "crates/*/Cargo.toml"
16//!     extract: { toml: "$.package.version" }
17//!   # OR form (b): an explicit heterogeneous list
18//!   # targets:
19//!   #   - { file: rust-toolchain.toml, extract: { toml: "$.toolchain.channel" } }
20//!   #   - { file: Dockerfile,          extract: { regex: "FROM rust:(\\S+)" } }
21//!   normalize: none                # none (default) | trim | lower | semver-major
22//!   allow_missing_target: false
23//!   level: error
24//! ```
25
26use std::path::Path;
27
28use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
29use serde::Deserialize;
30
31use crate::extract::{Extract, ExtractSpec, extract_values, is_non_literal};
32
33#[derive(Debug, Deserialize)]
34#[serde(deny_unknown_fields)]
35struct SourceSpec {
36    file: String,
37    extract: ExtractSpec,
38}
39
40#[derive(Debug, Deserialize)]
41#[serde(deny_unknown_fields)]
42struct TargetEntrySpec {
43    file: String,
44    extract: ExtractSpec,
45}
46
47/// `targets:` is either a `{ files: <glob>, extract: … }` map
48/// (form a — one query applied per glob match, the istio
49/// `value_extractor:` / pitfall-#20 shape) or a sequence of
50/// `{ file, extract }` (form b — heterogeneous pins). A YAML map
51/// vs a sequence are structurally distinct, so an untagged enum
52/// decodes them unambiguously (unlike the externally-tagged-enum
53/// trap `crate::extract` documents).
54#[derive(Debug, Deserialize)]
55#[serde(untagged)]
56enum TargetsSpec {
57    Glob { files: String, extract: ExtractSpec },
58    List(Vec<TargetEntrySpec>),
59}
60
61#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
62#[serde(rename_all = "kebab-case")]
63enum Normalize {
64    #[default]
65    None,
66    Trim,
67    Lower,
68    /// Compare only the leading `MAJOR` token (the dotnet/runtime
69    /// SDK-band shape: same feature band, not exact patch).
70    SemverMajor,
71}
72
73impl Normalize {
74    fn apply(self, v: &str) -> String {
75        match self {
76            Self::None => v.to_string(),
77            Self::Trim => v.trim().to_string(),
78            Self::Lower => v.trim().to_lowercase(),
79            Self::SemverMajor => v
80                .trim()
81                .split('.')
82                .next()
83                .unwrap_or("")
84                .trim_start_matches(|c: char| !c.is_ascii_digit())
85                .to_string(),
86        }
87    }
88}
89
90#[derive(Debug, Deserialize)]
91#[serde(deny_unknown_fields)]
92struct Options {
93    source: SourceSpec,
94    targets: TargetsSpec,
95    #[serde(default)]
96    normalize: Normalize,
97    #[serde(default)]
98    allow_missing_target: bool,
99}
100
101/// Resolved target shape.
102#[derive(Debug)]
103enum Targets {
104    Glob { scope: Scope, extract: Extract },
105    List(Vec<(String, Extract)>),
106}
107
108#[derive(Debug)]
109pub struct CrossFileValueEqualsRule {
110    id: String,
111    level: Level,
112    policy_url: Option<String>,
113    message: Option<String>,
114    source_file: String,
115    source_extract: Extract,
116    targets: Targets,
117    normalize: Normalize,
118    allow_missing: bool,
119}
120
121impl Rule for CrossFileValueEqualsRule {
122    alint_core::rule_common_impl!();
123
124    fn requires_full_index(&self) -> bool {
125        // Cross-file: the source and every target may live
126        // anywhere in the tree; never `--changed`-scoped.
127        true
128    }
129
130    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
131        let mut out = Vec::new();
132        let Some(source) = self.resolve_source(ctx, &mut out) else {
133            return Ok(out);
134        };
135        let source_norm = self.normalize.apply(&source);
136
137        match &self.targets {
138            Targets::Glob { scope, extract } => {
139                let mut matched = 0usize;
140                for e in ctx.index.files() {
141                    if !scope.matches(&e.path, ctx.index) {
142                        continue;
143                    }
144                    matched += 1;
145                    self.check_target(ctx, &e.path, extract, &source, &source_norm, &mut out);
146                }
147                if matched == 0 && !self.allow_missing {
148                    out.push(Self::violation(
149                        Path::new(&self.source_file),
150                        &format!("targets glob matched no files (source value {source:?})"),
151                    ));
152                }
153            }
154            Targets::List(list) => {
155                for (file, extract) in list {
156                    self.check_target(
157                        ctx,
158                        Path::new(file),
159                        extract,
160                        &source,
161                        &source_norm,
162                        &mut out,
163                    );
164                }
165            }
166        }
167        Ok(out)
168    }
169}
170
171impl CrossFileValueEqualsRule {
172    /// Read + extract the single authoritative source value.
173    /// `None` (with a violation pushed) when it can't be resolved.
174    fn resolve_source(&self, ctx: &Context<'_>, out: &mut Vec<Violation>) -> Option<String> {
175        let src = Path::new(&self.source_file);
176        let text = match read_rel(ctx, src) {
177            Ok(t) => t,
178            Err(crate::io::ReadCapError::TooLarge(n)) => {
179                out.push(Self::violation(
180                    src,
181                    &format!("source file is too large to analyze ({n} bytes; 256 MiB cap)"),
182                ));
183                return None;
184            }
185            Err(crate::io::ReadCapError::Io(e)) => {
186                out.push(Self::violation(
187                    src,
188                    &format!("source file is unreadable: {e}"),
189                ));
190                return None;
191            }
192        };
193        let values = match extract_values(&self.source_extract, &text) {
194            Ok(v) => v,
195            Err(e) => {
196                out.push(Self::violation(src, &format!("source extract failed: {e}")));
197                return None;
198            }
199        };
200        let literal: Vec<String> = values.into_iter().filter(|v| !is_non_literal(v)).collect();
201        match literal.as_slice() {
202            [one] => Some(one.clone()),
203            [] => {
204                out.push(Self::violation(
205                    src,
206                    "canonical value not found (the source query matched no literal value)",
207                ));
208                None
209            }
210            _ => {
211                out.push(Self::violation(
212                    src,
213                    "source must resolve to exactly one value (the query matched several)",
214                ));
215                None
216            }
217        }
218    }
219
220    fn check_target(
221        &self,
222        ctx: &Context<'_>,
223        target: &Path,
224        extract: &Extract,
225        source: &str,
226        source_norm: &str,
227        out: &mut Vec<Violation>,
228    ) {
229        let text = match read_rel(ctx, target) {
230            Ok(t) => t,
231            Err(crate::io::ReadCapError::TooLarge(n)) => {
232                // A too-large target is always a violation — never
233                // suppressed by `allow_missing` (it is present,
234                // just unanalysable).
235                out.push(Self::violation(
236                    target,
237                    &format!("target file is too large to analyze ({n} bytes; 256 MiB cap)"),
238                ));
239                return;
240            }
241            Err(crate::io::ReadCapError::Io(_)) => {
242                if !self.allow_missing {
243                    out.push(Self::violation(
244                        target,
245                        "target file is missing or unreadable",
246                    ));
247                }
248                return;
249            }
250        };
251        let values = match extract_values(extract, &text) {
252            Ok(v) => v,
253            Err(e) => {
254                out.push(Self::violation(
255                    target,
256                    &format!("target extract failed: {e}"),
257                ));
258                return;
259            }
260        };
261        let literal: Vec<&String> = values.iter().filter(|v| !is_non_literal(v)).collect();
262        if literal.is_empty() {
263            if !self.allow_missing {
264                out.push(Self::violation(
265                    target,
266                    "no literal value to compare (the target query matched nothing)",
267                ));
268            }
269            return;
270        }
271        for value in literal {
272            if self.normalize.apply(value) != source_norm {
273                out.push(self.mismatch(target, source, value));
274            }
275        }
276    }
277
278    fn violation(path: &Path, reason: &str) -> Violation {
279        Violation::new(format!("{}: {reason}", path.display())).with_path(path.to_path_buf())
280    }
281
282    fn mismatch(&self, target: &Path, source: &str, target_value: &str) -> Violation {
283        let msg = self.message.clone().unwrap_or_else(|| {
284            format!(
285                "{} value {target_value:?} != {} value {source:?}",
286                target.display(),
287                self.source_file,
288            )
289        });
290        Violation::new(msg).with_path(target.to_path_buf())
291    }
292}
293
294/// Read a tree-relative path as text (the index stores paths, not
295/// contents, so the cross-file rules read the file themselves).
296fn read_rel(ctx: &Context<'_>, rel: &Path) -> Result<String, crate::io::ReadCapError> {
297    crate::io::read_capped(&ctx.root.join(rel)).map(|b| String::from_utf8_lossy(&b).into_owned())
298}
299
300pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
301    alint_core::reject_scope_filter_on_cross_file(spec, "cross_file_value_equals")?;
302    let opts: Options = spec
303        .deserialize_options()
304        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
305
306    let cfg = |msg: String| Error::rule_config(&spec.id, msg);
307
308    if opts.source.file.trim().is_empty() {
309        return Err(cfg("`source.file` must not be empty".into()));
310    }
311    let source_extract = opts
312        .source
313        .extract
314        .resolve()
315        .map_err(|e| cfg(format!("invalid `source.extract`: {e}")))?;
316
317    let targets = match opts.targets {
318        TargetsSpec::Glob { files, extract } => {
319            if files.trim().is_empty() {
320                return Err(cfg("`targets.files` must not be empty".into()));
321            }
322            let scope = Scope::from_patterns(std::slice::from_ref(&files))
323                .map_err(|e| cfg(format!("invalid `targets.files` glob: {e}")))?;
324            Targets::Glob {
325                scope,
326                extract: extract
327                    .resolve()
328                    .map_err(|e| cfg(format!("invalid `targets.extract`: {e}")))?,
329            }
330        }
331        TargetsSpec::List(list) => {
332            if list.is_empty() {
333                return Err(cfg("`targets` list must not be empty".into()));
334            }
335            let mut resolved = Vec::with_capacity(list.len());
336            for (i, t) in list.into_iter().enumerate() {
337                if t.file.trim().is_empty() {
338                    return Err(cfg(format!("`targets[{i}].file` must not be empty")));
339                }
340                let ex = t
341                    .extract
342                    .resolve()
343                    .map_err(|e| cfg(format!("invalid `targets[{i}].extract`: {e}")))?;
344                resolved.push((t.file, ex));
345            }
346            Targets::List(resolved)
347        }
348    };
349
350    Ok(Box::new(CrossFileValueEqualsRule {
351        id: spec.id.clone(),
352        level: spec.level,
353        policy_url: spec.policy_url.clone(),
354        message: spec.message.clone(),
355        source_file: opts.source.file,
356        source_extract,
357        targets,
358        normalize: opts.normalize,
359        allow_missing: opts.allow_missing_target,
360    }))
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366    use alint_core::{FileEntry, FileIndex};
367
368    fn index(files: &[&str]) -> FileIndex {
369        FileIndex::from_entries(
370            files
371                .iter()
372                .map(|p| FileEntry {
373                    path: Path::new(p).into(),
374                    is_dir: false,
375                    size: 1,
376                })
377                .collect(),
378        )
379    }
380
381    fn rule(
382        source_file: &str,
383        source: Extract,
384        targets: Targets,
385        normalize: Normalize,
386    ) -> CrossFileValueEqualsRule {
387        CrossFileValueEqualsRule {
388            id: "t".into(),
389            level: Level::Error,
390            policy_url: None,
391            message: None,
392            source_file: source_file.into(),
393            source_extract: source,
394            targets,
395            normalize,
396            allow_missing: false,
397        }
398    }
399
400    fn eval(r: &CrossFileValueEqualsRule, root: &Path, idx: &FileIndex) -> Vec<Violation> {
401        let ctx = Context {
402            root,
403            index: idx,
404            registry: None,
405            facts: None,
406            vars: None,
407            git_tracked: None,
408            git_blame: None,
409        };
410        r.evaluate(&ctx).unwrap()
411    }
412
413    #[test]
414    fn glob_targets_pass_and_fail_on_version_lockstep() {
415        let dir = tempfile::tempdir().unwrap();
416        let root = dir.path();
417        std::fs::write(
418            root.join("Cargo.toml"),
419            "[workspace.package]\nversion = \"1.4.0\"\n",
420        )
421        .unwrap();
422        std::fs::create_dir_all(root.join("crates/a")).unwrap();
423        std::fs::create_dir_all(root.join("crates/b")).unwrap();
424        std::fs::write(
425            root.join("crates/a/Cargo.toml"),
426            "[package]\nversion = \"1.4.0\"\n",
427        )
428        .unwrap();
429        std::fs::write(
430            root.join("crates/b/Cargo.toml"),
431            "[package]\nversion = \"1.3.0\"\n",
432        )
433        .unwrap();
434        let idx = index(&["Cargo.toml", "crates/a/Cargo.toml", "crates/b/Cargo.toml"]);
435        let r = rule(
436            "Cargo.toml",
437            Extract::Toml("$.workspace.package.version".into()),
438            Targets::Glob {
439                scope: Scope::from_patterns(&["crates/*/Cargo.toml".to_string()]).unwrap(),
440                extract: Extract::Toml("$.package.version".into()),
441            },
442            Normalize::None,
443        );
444        let v = eval(&r, root, &idx);
445        assert_eq!(v.len(), 1, "only crates/b drifts: {v:?}");
446        assert!(v[0].message.contains("crates/b/Cargo.toml"));
447        assert!(v[0].message.contains("1.3.0"));
448    }
449
450    #[test]
451    fn explicit_list_heterogeneous_targets() {
452        let dir = tempfile::tempdir().unwrap();
453        let root = dir.path();
454        std::fs::write(
455            root.join("rust-toolchain.toml"),
456            "[toolchain]\nchannel = \"1.85\"\n",
457        )
458        .unwrap();
459        std::fs::write(root.join("VERSION"), "1.85\n").unwrap();
460        std::fs::write(root.join("Bad"), "1.84\n").unwrap();
461        let idx = index(&["rust-toolchain.toml", "VERSION", "Bad"]);
462        let r = rule(
463            "rust-toolchain.toml",
464            Extract::Toml("$.toolchain.channel".into()),
465            Targets::List(vec![
466                (
467                    "VERSION".into(),
468                    Extract::Lines(crate::extract::LinesOpts::default()),
469                ),
470                (
471                    "Bad".into(),
472                    Extract::Lines(crate::extract::LinesOpts::default()),
473                ),
474            ]),
475            Normalize::Trim,
476        );
477        let v = eval(&r, root, &idx);
478        assert_eq!(v.len(), 1, "only Bad drifts: {v:?}");
479        assert!(v[0].message.contains("Bad"));
480    }
481
482    #[test]
483    fn semver_major_normalize_allows_band() {
484        let dir = tempfile::tempdir().unwrap();
485        let root = dir.path();
486        std::fs::write(
487            root.join("global.json"),
488            "{\"sdk\":{\"version\":\"8.0.402\"}}",
489        )
490        .unwrap();
491        std::fs::write(root.join("Directory.Build.props"), "8.0.100\n").unwrap();
492        let idx = index(&["global.json", "Directory.Build.props"]);
493        let r = rule(
494            "global.json",
495            Extract::Json("$.sdk.version".into()),
496            Targets::List(vec![(
497                "Directory.Build.props".into(),
498                Extract::Lines(crate::extract::LinesOpts::default()),
499            )]),
500            Normalize::SemverMajor,
501        );
502        // 8.0.402 vs 8.0.100 — same major band, no violation.
503        assert!(eval(&r, root, &idx).is_empty());
504    }
505
506    #[test]
507    fn lower_normalize_makes_case_insensitive() {
508        // Design-doc normalize matrix: `lower` was untested.
509        let dir = tempfile::tempdir().unwrap();
510        let root = dir.path();
511        std::fs::write(root.join("src.txt"), "ABC\n").unwrap();
512        std::fs::write(root.join("tgt.txt"), "abc\n").unwrap();
513        let idx = index(&["src.txt", "tgt.txt"]);
514        let mk = |n| {
515            rule(
516                "src.txt",
517                Extract::Lines(crate::extract::LinesOpts::default()),
518                Targets::List(vec![(
519                    "tgt.txt".into(),
520                    Extract::Lines(crate::extract::LinesOpts::default()),
521                )]),
522                n,
523            )
524        };
525        assert_eq!(
526            eval(&mk(Normalize::None), root, &idx).len(),
527            1,
528            "ABC vs abc differ under None"
529        );
530        assert!(
531            eval(&mk(Normalize::Lower), root, &idx).is_empty(),
532            "lower normalize makes the compare case-insensitive"
533        );
534    }
535
536    #[test]
537    fn multi_value_source_is_an_error() {
538        let dir = tempfile::tempdir().unwrap();
539        let root = dir.path();
540        std::fs::write(root.join("m.json"), "{\"v\":[\"1\",\"2\"]}").unwrap();
541        let idx = index(&["m.json"]);
542        let r = rule(
543            "m.json",
544            Extract::Json("$.v[*]".into()),
545            Targets::List(vec![("m.json".into(), Extract::Json("$.v[0]".into()))]),
546            Normalize::None,
547        );
548        let v = eval(&r, root, &idx);
549        assert_eq!(v.len(), 1);
550        assert!(v[0].message.contains("exactly one value"));
551    }
552
553    #[test]
554    fn non_literal_target_value_is_skipped() {
555        let dir = tempfile::tempdir().unwrap();
556        let root = dir.path();
557        std::fs::write(root.join("src.toml"), "v = \"1.0.0\"\n").unwrap();
558        std::fs::write(root.join("t.toml"), "v = \"${VERSION}\"\n").unwrap();
559        let idx = index(&["src.toml", "t.toml"]);
560        let r = rule(
561            "src.toml",
562            Extract::Toml("$.v".into()),
563            Targets::List(vec![("t.toml".into(), Extract::Toml("$.v".into()))]),
564            Normalize::None,
565        );
566        // The only target value is interpolated -> skipped, not a
567        // mismatch; but "no literal value" fires unless allowed.
568        let mut r2 = r;
569        r2.allow_missing = true;
570        assert!(eval(&r2, root, &idx).is_empty(), "non-literal must skip");
571    }
572}