Skip to main content

fallow_cli/report/ci/
fingerprint.rs

1/// Fingerprint key used in SARIF partialFingerprints and other CI formats.
2pub const FINGERPRINT_KEY: &str = "tools.fallow.fingerprint/v1";
3
4/// Conventional SARIF key consumed by GitHub Code Scanning's alert-correlation
5/// engine. Emitted in addition to `FINGERPRINT_KEY` so GHAS deduplicates fallow
6/// alerts across pushes.
7pub const GHAS_FINGERPRINT_KEY: &str = "primaryLocationLineHash/v1";
8
9#[must_use]
10pub fn normalize_snippet(snippet: &str) -> String {
11    snippet
12        .lines()
13        .map(str::trim)
14        .filter(|line| !line.is_empty())
15        .collect::<Vec<_>>()
16        .join("\n")
17}
18
19/// Compute a deterministic fingerprint hash from key fields.
20///
21/// Uses FNV-1a (64-bit) for guaranteed cross-version stability.
22/// `DefaultHasher` is explicitly not specified across Rust versions.
23#[must_use]
24pub fn fingerprint_hash(parts: &[&str]) -> String {
25    let mut hash: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
26    for part in parts {
27        for byte in part.bytes() {
28            hash ^= u64::from(byte);
29            hash = hash.wrapping_mul(0x0100_0000_01b3); // FNV prime
30        }
31        // Separator between parts to avoid "ab"+"c" == "a"+"bc"
32        hash ^= 0xff;
33        hash = hash.wrapping_mul(0x0100_0000_01b3);
34    }
35    format!("{hash:016x}")
36}
37
38#[must_use]
39pub fn finding_fingerprint(rule_id: &str, path: &str, snippet: &str) -> String {
40    let normalized = normalize_snippet(snippet);
41    fingerprint_hash(&[rule_id, path, &normalized])
42}
43
44/// Stable fingerprint for the review envelope's top-level summary block
45/// (issue #528 / v2). Hashes the rendered summary body so consumers can
46/// reconcile a single sticky PR/MR summary comment by fingerprint match
47/// without invoking fallow twice. Stable across runs that produce the same
48/// summary content; the hash shifts when finding counts or section headers
49/// change, so consumers detect content change cheaply.
50#[must_use]
51pub fn summary_fingerprint(body: &str) -> String {
52    fingerprint_hash(&[body])
53}
54
55/// Composite fingerprint for v2 same-line merged comments (issue #528).
56/// Hashes the sorted list of constituent per-finding fingerprints (joined
57/// by `:`) and prefixes the resulting 16-char FNV-1a hash with `merged:`
58/// so consumers can discriminate the merged shape from a single-finding
59/// fingerprint by string inspection. The hash changes when constituent
60/// findings change membership across runs; the bundled wrappers
61/// (`action/scripts/review.sh`, `ci/scripts/review.sh`) and
62/// `fallow ci reconcile-review` consume only the primary fingerprint, so
63/// content-change yielding a new fingerprint cleanly re-posts on the next
64/// run rather than silently keeping a stale body. External consumers that
65/// want update-in-place reconciliation implement their own identity
66/// tracking via `marker_regex`.
67#[must_use]
68pub fn composite_fingerprint(constituents: &[&str]) -> String {
69    let mut sorted: Vec<&str> = constituents.to_vec();
70    sorted.sort_unstable();
71    let joined = sorted.join(":");
72    format!("merged:{}", fingerprint_hash(&[joined.as_str()]))
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn fingerprint_is_stable_for_whitespace_only_snippet_changes() {
81        let a = finding_fingerprint(
82            "fallow/unused-export",
83            "src/a.ts",
84            "  export const x = 1;  ",
85        );
86        let b = finding_fingerprint(
87            "fallow/unused-export",
88            "src/a.ts",
89            "\nexport const x = 1;\n",
90        );
91        assert_eq!(a, b);
92    }
93
94    #[test]
95    fn fingerprint_parts_are_separated() {
96        assert_ne!(
97            fingerprint_hash(&["ab", "c"]),
98            fingerprint_hash(&["a", "bc"])
99        );
100    }
101
102    #[test]
103    fn composite_fingerprint_shifts_when_constituents_change() {
104        // Hash incorporates the sorted constituent fingerprints, so adding
105        // or removing one shifts the merged identity. Idempotent on equal
106        // input regardless of insertion order (sort stabilises it). The
107        // wire shape is `merged:<16-char hex>` so consumers can detect
108        // compositeness by prefix without re-hashing.
109        let three = composite_fingerprint(&["fp_a", "fp_b", "fp_c"]);
110        let drop_b = composite_fingerprint(&["fp_a", "fp_c"]);
111        let reordered = composite_fingerprint(&["fp_c", "fp_a", "fp_b"]);
112        assert_ne!(three, drop_b);
113        assert_eq!(three, reordered);
114        assert!(three.starts_with("merged:"));
115        // 7 chars prefix + 16 hex = 23 total.
116        assert_eq!(three.len(), 23);
117    }
118
119    #[test]
120    fn summary_fingerprint_shifts_when_body_changes() {
121        let a = summary_fingerprint("### Fallow check\n\n0 findings");
122        let b = summary_fingerprint("### Fallow check\n\n1 finding");
123        assert_ne!(a, b);
124        // Idempotent.
125        assert_eq!(a, summary_fingerprint("### Fallow check\n\n0 findings"));
126        // 16 hex chars, no prefix.
127        assert_eq!(a.len(), 16);
128    }
129}