1use std::path::Path;
23
24use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
25use serde::Deserialize;
26use sha2::{Digest, Sha256, Sha512};
27
28#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
29#[serde(rename_all = "lowercase")]
30enum Algorithm {
31 #[default]
32 Sha256,
33 Sha512,
34}
35
36impl Algorithm {
37 fn hex(self, bytes: &[u8]) -> String {
39 match self {
40 Self::Sha256 => encode_hex(Sha256::digest(bytes).as_slice()),
41 Self::Sha512 => encode_hex(Sha512::digest(bytes).as_slice()),
42 }
43 }
44
45 fn label(self) -> &'static str {
46 match self {
47 Self::Sha256 => "sha256",
48 Self::Sha512 => "sha512",
49 }
50 }
51}
52
53#[derive(Debug, Clone, Copy, Deserialize, Default, PartialEq, Eq)]
54#[serde(rename_all = "kebab-case")]
55enum Format {
56 #[default]
58 Contains,
59 SumsLine,
62}
63
64#[derive(Debug, Deserialize)]
65#[serde(deny_unknown_fields)]
66struct Options {
67 source: String,
68 target: String,
71 #[serde(default)]
72 algorithm: Algorithm,
73 #[serde(default)]
74 format: Format,
75}
76
77#[derive(Debug)]
78pub struct PairHashRule {
79 id: String,
80 level: Level,
81 policy_url: Option<String>,
82 message: Option<String>,
83 source_scope: Scope,
84 target: String,
85 algorithm: Algorithm,
86 format: Format,
87}
88
89impl Rule for PairHashRule {
90 alint_core::rule_common_impl!();
91
92 fn requires_full_index(&self) -> bool {
93 true
98 }
99
100 fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
101 let target_path = Path::new(&self.target);
102 let b_bytes = match crate::io::read_capped(&ctx.root.join(target_path)) {
103 Ok(b) => b,
104 Err(crate::io::ReadCapError::TooLarge(n)) => {
105 return Ok(vec![
106 Violation::new(format!(
107 "pair_hash target {:?} is too large to analyze \
108 ({n} bytes; 256 MiB cap)",
109 self.target
110 ))
111 .with_path(std::sync::Arc::<Path>::from(target_path)),
112 ]);
113 }
114 Err(crate::io::ReadCapError::Io(_)) => {
115 let msg = self.message.clone().unwrap_or_else(|| {
116 format!(
117 "pair_hash target {:?} does not exist or is unreadable",
118 self.target
119 )
120 });
121 return Ok(vec![
122 Violation::new(msg).with_path(std::sync::Arc::<Path>::from(target_path)),
123 ]);
124 }
125 };
126 let b_text = String::from_utf8_lossy(&b_bytes);
127 let b_lower = b_text.to_ascii_lowercase();
128
129 let mut violations = Vec::new();
130 for entry in ctx.index.files() {
131 if !self.source_scope.matches(&entry.path, ctx.index) {
132 continue;
133 }
134 let a_bytes = match crate::io::read_capped(&ctx.root.join(&entry.path)) {
135 Ok(b) => b,
136 Err(crate::io::ReadCapError::TooLarge(n)) => {
137 violations.push(
138 Violation::new(format!(
139 "{} is too large to hash ({n} bytes; 256 MiB cap)",
140 entry.path.display()
141 ))
142 .with_path(entry.path.clone()),
143 );
144 continue;
145 }
146 Err(crate::io::ReadCapError::Io(_)) => continue,
148 };
149 let digest = self.algorithm.hex(&a_bytes);
150 if let Some(desc) = self.check(&entry.path, &digest, &b_text, &b_lower) {
151 let msg = self.message.clone().unwrap_or(desc);
152 violations.push(Violation::new(msg).with_path(entry.path.clone()));
153 }
154 }
155 Ok(violations)
156 }
157}
158
159impl PairHashRule {
160 fn check(&self, src: &Path, digest: &str, b: &str, b_lower: &str) -> Option<String> {
163 match self.format {
164 Format::Contains => {
165 if b_lower.contains(digest) {
166 return None;
167 }
168 Some(format!(
169 "{} of {} ({digest}) not found in {}",
170 self.algorithm.label(),
171 src.display(),
172 self.target,
173 ))
174 }
175 Format::SumsLine => {
176 let want = src.to_string_lossy();
177 for line in b.lines() {
178 let mut tok = line.split_whitespace();
179 let (Some(hex), Some(path_tok)) = (tok.next(), tok.next()) else {
180 continue;
181 };
182 let path_tok = path_tok.strip_prefix('*').unwrap_or(path_tok);
191 let path_tok = path_tok.strip_prefix("./").unwrap_or(path_tok);
192 if path_tok != want {
193 continue;
194 }
195 return if hex.eq_ignore_ascii_case(digest) {
196 None
197 } else {
198 Some(format!(
199 "{} digest mismatch for {} in {}: manifest has {hex}, \
200 file hashes to {digest}",
201 self.algorithm.label(),
202 src.display(),
203 self.target,
204 ))
205 };
206 }
207 Some(format!(
208 "{} is not listed in manifest {}",
209 src.display(),
210 self.target,
211 ))
212 }
213 }
214 }
215}
216
217fn encode_hex(bytes: &[u8]) -> String {
220 use std::fmt::Write as _;
221 let mut s = String::with_capacity(bytes.len() * 2);
222 for b in bytes {
223 write!(s, "{b:02x}").unwrap();
224 }
225 s
226}
227
228pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
229 alint_core::reject_scope_filter_on_cross_file(spec, "pair_hash")?;
230 let opts: Options = spec
231 .deserialize_options()
232 .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
233 if opts.source.trim().is_empty() {
234 return Err(Error::rule_config(
235 &spec.id,
236 "pair_hash `source` must not be empty",
237 ));
238 }
239 if opts.target.trim().is_empty() {
240 return Err(Error::rule_config(
241 &spec.id,
242 "pair_hash `target` (the file that must carry the digest) must not be empty",
243 ));
244 }
245 if spec.fix.is_some() {
246 return Err(Error::rule_config(
247 &spec.id,
248 "pair_hash has no fix op — regenerating a checksum manifest is the \
249 manifest generator's job, not alint's",
250 ));
251 }
252 let source_scope = Scope::from_patterns(std::slice::from_ref(&opts.source))?;
253 Ok(Box::new(PairHashRule {
254 id: spec.id.clone(),
255 level: spec.level,
256 policy_url: spec.policy_url.clone(),
257 message: spec.message.clone(),
258 source_scope,
259 target: opts.target,
260 algorithm: opts.algorithm,
261 format: opts.format,
262 }))
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268 use crate::test_support::{ctx, tempdir_with_files};
269
270 const HELLO_SHA256: &str = "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824";
272
273 fn rule(source: &str, target: &str, algorithm: Algorithm, format: Format) -> PairHashRule {
274 PairHashRule {
275 id: "t".into(),
276 level: Level::Error,
277 policy_url: None,
278 message: None,
279 source_scope: Scope::from_patterns(&[source.to_string()]).unwrap(),
280 target: target.into(),
281 algorithm,
282 format,
283 }
284 }
285
286 #[test]
287 fn sha256_known_vector() {
288 assert_eq!(Algorithm::Sha256.hex(b"hello"), HELLO_SHA256);
289 }
290
291 #[test]
292 fn contains_passes_when_digest_embedded() {
293 let manifest = format!("// pinned\nHASH = {HELLO_SHA256}\n");
294 let (tmp, idx) =
295 tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", manifest.as_bytes())]);
296 let r = rule("a.txt", "pin.txt", Algorithm::Sha256, Format::Contains);
297 assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
298 }
299
300 #[test]
301 fn contains_fires_when_digest_absent() {
302 let (tmp, idx) =
303 tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", b"nothing relevant here\n")]);
304 let r = rule("a.txt", "pin.txt", Algorithm::Sha256, Format::Contains);
305 let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
306 assert_eq!(v.len(), 1);
307 assert_eq!(v[0].path.as_deref(), Some(Path::new("a.txt")));
308 assert!(v[0].message.contains("not found in"));
309 }
310
311 #[test]
312 fn contains_is_case_insensitive() {
313 let manifest = format!("HASH={}\n", HELLO_SHA256.to_ascii_uppercase());
314 let (tmp, idx) =
315 tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", manifest.as_bytes())]);
316 let r = rule("a.txt", "pin.txt", Algorithm::Sha256, Format::Contains);
317 assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
318 }
319
320 #[test]
321 fn sums_line_passes_on_matching_line() {
322 let manifest = format!("{HELLO_SHA256} a.txt\n");
323 let (tmp, idx) =
324 tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
325 let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
326 assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
327 }
328
329 #[test]
330 fn sums_line_tolerates_binary_marker() {
331 let manifest = format!("{HELLO_SHA256} *a.txt\n");
332 let (tmp, idx) =
333 tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
334 let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
335 assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
336 }
337
338 #[test]
339 fn sums_line_tolerates_dot_slash_prefix() {
340 let manifest = format!("{HELLO_SHA256} ./a.txt\n");
344 let (tmp, idx) =
345 tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
346 let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
347 assert!(
348 r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty(),
349 "a ./-prefixed sums-line path must match the index path"
350 );
351 }
352
353 #[test]
354 fn sha512_sums_line_round_trips() {
355 let digest = Algorithm::Sha512.hex(b"hello");
356 let manifest = format!("{digest} a.txt\n");
357 let (tmp, idx) =
358 tempdir_with_files(&[("a.txt", b"hello"), ("SHA512SUMS", manifest.as_bytes())]);
359 let r = rule("a.txt", "SHA512SUMS", Algorithm::Sha512, Format::SumsLine);
360 assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
361 }
362
363 #[test]
364 fn sums_line_fires_on_wrong_hash() {
365 let bad = "0".repeat(64);
366 let manifest = format!("{bad} a.txt\n");
367 let (tmp, idx) =
368 tempdir_with_files(&[("a.txt", b"hello"), ("SHA256SUMS", manifest.as_bytes())]);
369 let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
370 let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
371 assert_eq!(v.len(), 1);
372 assert!(v[0].message.contains("digest mismatch"));
373 }
374
375 #[test]
376 fn sums_line_fires_when_path_not_listed() {
377 let (tmp, idx) = tempdir_with_files(&[
378 ("a.txt", b"hello"),
379 ("SHA256SUMS", b"deadbeef other.txt\n"),
380 ]);
381 let r = rule("a.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
382 let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
383 assert_eq!(v.len(), 1);
384 assert!(v[0].message.contains("not listed in manifest"));
385 }
386
387 #[test]
388 fn missing_in_is_one_violation_on_in() {
389 let (tmp, idx) = tempdir_with_files(&[("a.txt", b"hello")]);
390 let r = rule("a.txt", "nope.sum", Algorithm::Sha256, Format::Contains);
391 let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
392 assert_eq!(v.len(), 1);
393 assert_eq!(v[0].path.as_deref(), Some(Path::new("nope.sum")));
394 assert!(v[0].message.contains("does not exist"));
395 }
396
397 #[test]
398 fn sha512_contains_round_trips() {
399 let digest = Algorithm::Sha512.hex(b"hello");
400 let manifest = format!("sha512 = {digest}\n");
401 let (tmp, idx) =
402 tempdir_with_files(&[("a.txt", b"hello"), ("pin.txt", manifest.as_bytes())]);
403 let r = rule("a.txt", "pin.txt", Algorithm::Sha512, Format::Contains);
404 assert!(r.evaluate(&ctx(tmp.path(), &idx)).unwrap().is_empty());
405 }
406
407 #[test]
408 fn glob_source_one_violation_per_offender() {
409 let ok_hash = Algorithm::Sha256.hex(b"ok");
412 let manifest = format!("{ok_hash} ok.txt\n");
413 let (tmp, idx) = tempdir_with_files(&[
414 ("ok.txt", b"ok"),
415 ("bad.txt", b"bad"),
416 ("SHA256SUMS", manifest.as_bytes()),
417 ]);
418 let r = rule("*.txt", "SHA256SUMS", Algorithm::Sha256, Format::SumsLine);
419 let v = r.evaluate(&ctx(tmp.path(), &idx)).unwrap();
420 assert_eq!(v.len(), 1, "{v:?}");
421 assert_eq!(v[0].path.as_deref(), Some(Path::new("bad.txt")));
422 }
423
424 #[test]
425 fn build_rejects_empty_source_and_fix_block() {
426 let spec = crate::test_support::spec_yaml(
427 "id: t\nkind: pair_hash\nsource: \"\"\ntarget: s.sum\nlevel: error\n",
428 );
429 assert!(
430 build(&spec)
431 .unwrap_err()
432 .to_string()
433 .contains("`source` must not be empty")
434 );
435 let spec = crate::test_support::spec_yaml(
436 "id: t\nkind: pair_hash\nsource: a.txt\ntarget: s.sum\nlevel: error\n\
437 fix:\n file_remove: {}\n",
438 );
439 assert!(build(&spec).unwrap_err().to_string().contains("no fix op"));
440 }
441}