Skip to main content

alint_rules/
pair_hash.rs

1//! `pair_hash` — a target file must carry the digest of a source
2//! file.
3//!
4//! The `algorithm` digest of every file matching `source` must
5//! appear in the single `target` — either as an embedded hex
6//! substring (`contains`) or a coreutils / go-`.sum`-style
7//! `<hex>  <path>` manifest line (`sums-line`). Cross-file rule
8//! (the `pair` dispatch class). alint never rewrites the manifest
9//! (detection-only, like `file_hash`). Design + open-question
10//! resolutions: `docs/design/v0.10/pair_hash.md`.
11//!
12//! ```yaml
13//! - id: fips-sum-pins-module
14//!   kind: pair_hash
15//!   source: "src/crypto/internal/fips140/v1.0.0/**/*.go"
16//!   target: "src/crypto/internal/fips140/fips140.sum"
17//!   algorithm: sha256          # sha256 (default) | sha512
18//!   format: sums-line          # contains (default) | sums-line
19//!   level: error
20//! ```
21
22use std::path::Path;
23
24use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
25use serde::Deserialize;
26use sha2::{Digest, Sha256, Sha512};
27
28#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
29#[serde(rename_all = "lowercase")]
30enum Algorithm {
31    #[default]
32    Sha256,
33    Sha512,
34}
35
36impl Algorithm {
37    /// Lowercase hex digest of `bytes`.
38    fn hex(self, bytes: &[u8]) -> String {
39        match self {
40            Self::Sha256 => encode_hex(Sha256::digest(bytes).as_slice()),
41            Self::Sha512 => encode_hex(Sha512::digest(bytes).as_slice()),
42        }
43    }
44
45    fn label(self) -> &'static str {
46        match self {
47            Self::Sha256 => "sha256",
48            Self::Sha512 => "sha512",
49        }
50    }
51}
52
53#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
54#[serde(rename_all = "kebab-case")]
55enum Format {
56    /// The digest must appear as a substring anywhere in `target`.
57    #[default]
58    Contains,
59    /// `target` must carry a `sha256sum`-style `<hex> [*]<path>`
60    /// line whose path token is the source's path.
61    SumsLine,
62}
63
64#[derive(Debug, Deserialize)]
65#[serde(deny_unknown_fields)]
66struct Options {
67    source: String,
68    /// The single file that must carry the digest (a `.sum` /
69    /// `SHA256SUMS` / a file with an embedded hash).
70    target: String,
71    #[serde(default)]
72    algorithm: Algorithm,
73    #[serde(default)]
74    format: Format,
75}
76
77#[derive(Debug)]
78pub struct PairHashRule {
79    id: String,
80    level: Level,
81    policy_url: Option<String>,
82    message: Option<String>,
83    source_scope: Scope,
84    target: String,
85    algorithm: Algorithm,
86    format: Format,
87}
88
89impl Rule for PairHashRule {
90    alint_core::rule_common_impl!();
91
92    fn requires_full_index(&self) -> bool {
93        // Cross-file: the verdict for a source depends on the
94        // contents of a separate target file, not the diff. Same
95        // dispatch class as `pair` — opts out of `--changed`
96        // filtering; `path_scope` stays `None`.
97        true
98    }
99
100    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
101        let target_path = Path::new(&self.target);
102        let b_bytes = match crate::io::read_capped(&ctx.root.join(target_path)) {
103            Ok(b) => b,
104            Err(crate::io::ReadCapError::TooLarge(n)) => {
105                return Ok(vec![
106                    Violation::new(format!(
107                        "pair_hash target {:?} is too large to analyze \
108                         ({n} bytes; 256 MiB cap)",
109                        self.target
110                    ))
111                    .with_path(std::sync::Arc::<Path>::from(target_path)),
112                ]);
113            }
114            Err(crate::io::ReadCapError::Io(_)) => {
115                let msg = self.message.clone().unwrap_or_else(|| {
116                    format!(
117                        "pair_hash target {:?} does not exist or is unreadable",
118                        self.target
119                    )
120                });
121                return Ok(vec![
122                    Violation::new(msg).with_path(std::sync::Arc::<Path>::from(target_path)),
123                ]);
124            }
125        };
126        let b_text = String::from_utf8_lossy(&b_bytes);
127        let b_lower = b_text.to_ascii_lowercase();
128
129        let mut violations = Vec::new();
130        for entry in ctx.index.files() {
131            if !self.source_scope.matches(&entry.path, ctx.index) {
132                continue;
133            }
134            let a_bytes = match crate::io::read_capped(&ctx.root.join(&entry.path)) {
135                Ok(b) => b,
136                Err(crate::io::ReadCapError::TooLarge(n)) => {
137                    violations.push(
138                        Violation::new(format!(
139                            "{} is too large to hash ({n} bytes; 256 MiB cap)",
140                            entry.path.display()
141                        ))
142                        .with_path(entry.path.clone()),
143                    );
144                    continue;
145                }
146                // permission / race — silent skip, like content rules
147                Err(crate::io::ReadCapError::Io(_)) => continue,
148            };
149            let digest = self.algorithm.hex(&a_bytes);
150            if let Some(desc) = self.check(&entry.path, &digest, &b_text, &b_lower) {
151                let msg = self.message.clone().unwrap_or(desc);
152                violations.push(Violation::new(msg).with_path(entry.path.clone()));
153            }
154        }
155        Ok(violations)
156    }
157}
158
159impl PairHashRule {
160    /// `None` ⇒ the source's digest is properly present in the
161    /// target; `Some(desc)` ⇒ a violation description.
162    fn check(&self, src: &Path, digest: &str, b: &str, b_lower: &str) -> Option<String> {
163        match self.format {
164            Format::Contains => {
165                if b_lower.contains(digest) {
166                    return None;
167                }
168                Some(format!(
169                    "{} of {} ({digest}) not found in {}",
170                    self.algorithm.label(),
171                    src.display(),
172                    self.target,
173                ))
174            }
175            Format::SumsLine => {
176                let want = src.to_string_lossy();
177                for line in b.lines() {
178                    let mut tok = line.split_whitespace();
179                    let (Some(hex), Some(path_tok)) = (tok.next(), tok.next()) else {
180                        continue;
181                    };
182                    // Normalise the coreutils binary-mode `*`
183                    // marker and a `find`-style `./` prefix
184                    // (`<hex>  ./path`, what `find … -exec
185                    // sha256sum` and Go tooling emit) so the
186                    // token compares against the source's
187                    // repo-root-relative index path. Backslash
188                    // separators are not normalised — the `.sum`
189                    // formats in scope are forward-slash.
190                    let path_tok = path_tok.strip_prefix('*').unwrap_or(path_tok);
191                    let path_tok = path_tok.strip_prefix("./").unwrap_or(path_tok);
192                    if path_tok != want {
193                        continue;
194                    }
195                    return if hex.eq_ignore_ascii_case(digest) {
196                        None
197                    } else {
198                        Some(format!(
199                            "{} digest mismatch for {} in {}: manifest has {hex}, \
200                             file hashes to {digest}",
201                            self.algorithm.label(),
202                            src.display(),
203                            self.target,
204                        ))
205                    };
206                }
207                Some(format!(
208                    "{} is not listed in manifest {}",
209                    src.display(),
210                    self.target,
211                ))
212            }
213        }
214    }
215}
216
217/// Lowercase hex. Local (mirrors `file_hash`'s private encoder)
218/// to avoid a crate-wide pub helper for one rule.
219fn encode_hex(bytes: &[u8]) -> String {
220    use std::fmt::Write as _;
221    let mut s = String::with_capacity(bytes.len() * 2);
222    for b in bytes {
223        write!(s, "{b:02x}").unwrap();
224    }
225    s
226}
227
228pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
229    alint_core::reject_scope_filter_on_cross_file(spec, "pair_hash")?;
230    let opts: Options = spec
231        .deserialize_options()
232        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
233    if opts.source.trim().is_empty() {
234        return Err(Error::rule_config(
235            &spec.id,
236            "pair_hash `source` must not be empty",
237        ));
238    }
239    if opts.target.trim().is_empty() {
240        return Err(Error::rule_config(
241            &spec.id,
242            "pair_hash `target` (the file that must carry the digest) must not be empty",
243        ));
244    }
245    if spec.fix.is_some() {
246        return Err(Error::rule_config(
247            &spec.id,
248            "pair_hash has no fix op — regenerating a checksum manifest is the \
249             manifest generator's job, not alint's",
250        ));
251    }
252    let source_scope = Scope::from_patterns(std::slice::from_ref(&opts.source))?;
253    Ok(Box::new(PairHashRule {
254        id: spec.id.clone(),
255        level: spec.level,
256        policy_url: spec.policy_url.clone(),
257        message: spec.message.clone(),
258        source_scope,
259        target: opts.target,
260        algorithm: opts.algorithm,
261        format: opts.format,
262    }))
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use crate::test_support::{ctx, tempdir_with_files};
269
270    // sha256("hello") — well-known vector.
271    const HELLO_SHA256: &str = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824";
272
273    fn rule(source: &str, target: &str, algorithm: Algorithm, format: Format) -> PairHashRule {
274        PairHashRule {
275            id: "t".into(),
276            level: Level::Error,
277            policy_url: None,
278            message: None,
279            source_scope: Scope::from_patterns(&[source.to_string()]).unwrap(),
280            target: target.into(),
281            algorithm,
282            format,
283        }
284    }
285
286    #[test]
287    fn sha256_known_vector() {
288        assert_eq!(Algorithm::Sha256.hex(b"hello"), HELLO_SHA256);
289    }
290
291    #[test]
292    fn contains_passes_when_digest_embedded() {
293        let manifest = format!("// pinned\nHASH = {HELLO_SHA256}\n");
294        let (tmp, idx) =
295            tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", manifest.as_bytes())]);
296        let r = rule("a.txt", "pin.txt", Algorithm::Sha256, Format::Contains);
297        assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
298    }
299
300    #[test]
301    fn contains_fires_when_digest_absent() {
302        let (tmp, idx) =
303            tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", b"nothing relevant here\n")]);
304        let r = rule("a.txt", "pin.txt", Algorithm::Sha256, Format::Contains);
305        let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
306        assert_eq!(v.len(), 1);
307        assert_eq!(v[0].path.as_deref(), Some(Path::new("a.txt")));
308        assert!(v[0].message.contains("not found in"));
309    }
310
311    #[test]
312    fn contains_is_case_insensitive() {
313        let manifest = format!("HASH={}\n", HELLO_SHA256.to_ascii_uppercase());
314        let (tmp, idx) =
315            tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", manifest.as_bytes())]);
316        let r = rule("a.txt", "pin.txt", Algorithm::Sha256, Format::Contains);
317        assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
318    }
319
320    #[test]
321    fn sums_line_passes_on_matching_line() {
322        let manifest = format!("{HELLO_SHA256}  a.txt\n");
323        let (tmp, idx) =
324            tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
325        let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
326        assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
327    }
328
329    #[test]
330    fn sums_line_tolerates_binary_marker() {
331        let manifest = format!("{HELLO_SHA256} *a.txt\n");
332        let (tmp, idx) =
333            tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
334        let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
335        assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
336    }
337
338    #[test]
339    fn sums_line_tolerates_dot_slash_prefix() {
340        // `find … -exec sha256sum` / Go tooling emit
341        // `<hex>  ./path`; the `./` must not cause a false
342        // "not listed in manifest" on a correctly-pinned file.
343        let manifest = format!("{HELLO_SHA256}  ./a.txt\n");
344        let (tmp, idx) =
345            tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
346        let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
347        assert!(
348            r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty(),
349            "a ./-prefixed sums-line path must match the index path"
350        );
351    }
352
353    #[test]
354    fn sha512_sums_line_round_trips() {
355        let digest = Algorithm::Sha512.hex(b"hello");
356        let manifest = format!("{digest}  a.txt\n");
357        let (tmp, idx) =
358            tempdir_with_files(&[("a.txt", b"hello"), ("SHA512SUMS", manifest.as_bytes())]);
359        let r = rule("a.txt", "SHA512SUMS", Algorithm::Sha512, Format::SumsLine);
360        assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
361    }
362
363    #[test]
364    fn sums_line_fires_on_wrong_hash() {
365        let bad = "0".repeat(64);
366        let manifest = format!("{bad}  a.txt\n");
367        let (tmp, idx) =
368            tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
369        let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
370        let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
371        assert_eq!(v.len(), 1);
372        assert!(v[0].message.contains("digest mismatch"));
373    }
374
375    #[test]
376    fn sums_line_fires_when_path_not_listed() {
377        let (tmp, idx) = tempdir_with_files(&[
378            ("a.txt", b"hello"),
379            ("SHA256SUMS", b"deadbeef  other.txt\n"),
380        ]);
381        let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
382        let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
383        assert_eq!(v.len(), 1);
384        assert!(v[0].message.contains("not listed in manifest"));
385    }
386
387    #[test]
388    fn missing_in_is_one_violation_on_in() {
389        let (tmp, idx) = tempdir_with_files(&[("a.txt", b"hello")]);
390        let r = rule("a.txt", "nope.sum", Algorithm::Sha256, Format::Contains);
391        let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
392        assert_eq!(v.len(), 1);
393        assert_eq!(v[0].path.as_deref(), Some(Path::new("nope.sum")));
394        assert!(v[0].message.contains("does not exist"));
395    }
396
397    #[test]
398    fn sha512_contains_round_trips() {
399        let digest = Algorithm::Sha512.hex(b"hello");
400        let manifest = format!("sha512 = {digest}\n");
401        let (tmp, idx) =
402            tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", manifest.as_bytes())]);
403        let r = rule("a.txt", "pin.txt", Algorithm::Sha512, Format::Contains);
404        assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
405    }
406
407    #[test]
408    fn glob_source_one_violation_per_offender() {
409        // ok.txt is listed correctly; bad.txt is not in the
410        // manifest at all → exactly one violation (on bad.txt).
411        let ok_hash = Algorithm::Sha256.hex(b"ok");
412        let manifest = format!("{ok_hash}  ok.txt\n");
413        let (tmp, idx) = tempdir_with_files(&[
414            ("ok.txt", b"ok"),
415            ("bad.txt", b"bad"),
416            ("SHA256SUMS", manifest.as_bytes()),
417        ]);
418        let r = rule("*.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
419        let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
420        assert_eq!(v.len(), 1, "{v:?}");
421        assert_eq!(v[0].path.as_deref(), Some(Path::new("bad.txt")));
422    }
423
424    #[test]
425    fn build_rejects_empty_source_and_fix_block() {
426        let spec = crate::test_support::spec_yaml(
427            "id: t\nkind: pair_hash\nsource: \"\"\ntarget: s.sum\nlevel: error\n",
428        );
429        assert!(
430            build(&spec)
431                .unwrap_err()
432                .to_string()
433                .contains("`source` must not be empty")
434        );
435        let spec = crate::test_support::spec_yaml(
436            "id: t\nkind: pair_hash\nsource: a.txt\ntarget: s.sum\nlevel: error\n\
437             fix:\n  file_remove: {}\n",
438        );
439        assert!(build(&spec).unwrap_err().to_string().contains("no fix op"));
440    }
441}