Skip to main content

linprov_common/
siphash.rs

1//! SipHash-2-4 — the keyed PRF that replaces FNV for hashing paths into the
2//! allowlist / provenance record (finding #3).
3//!
4//! FNV is non-cryptographic and invertible: a local attacker can *construct*
5//! a path that hashes to an allowlisted path's value and bypass enforce mode.
6//! SipHash keyed with a per-install secret is a PRF — without the key, an
7//! attacker can't predict or collide outputs, even with a query oracle (a
8//! non-root user can read the `security.bpf.linprov.origin` xattr off a file
9//! they own, so oracle-resistance is required; that's why a lightweight
10//! algebraic keyed hash wouldn't do).
11//!
12//! Both the userspace daemon and the eBPF program must compute identical
13//! values. Userspace calls the one-shot [`siphash`]. The eBPF program can't
14//! (it needs a streaming walk that emits a hash at every `/` of a path in one
15//! pass), so it re-implements the same construction from [`sip_init`],
16//! [`sipround`], and the final block / finalization rules — the
17//! `streaming_walk_matches_one_shot` test below pins that streaming form to
18//! the one-shot, byte-for-byte, so the BPF port has an executable spec.
19
20/// SipHash internal state: four 64-bit words.
21pub type SipState = [u64; 4];
22
23/// SipHash initialization vector (the ASCII of "somepseudorandomlygenerated…").
24pub const SIP_IV: SipState = [
25    0x736f_6d65_7073_6575,
26    0x646f_7261_6e64_6f6d,
27    0x6c79_6765_6e65_7261,
28    0x7465_6462_7974_6573,
29];
30
31/// Compression rounds (the `2` in SipHash-2-4).
32pub const SIP_C: usize = 2;
33/// Finalization rounds (the `4` in SipHash-2-4).
34pub const SIP_D: usize = 4;
35
36/// Initialize the state from a 128-bit key `(k0, k1)`.
37#[inline(always)]
38pub fn sip_init(k0: u64, k1: u64) -> SipState {
39    [
40        SIP_IV[0] ^ k0,
41        SIP_IV[1] ^ k1,
42        SIP_IV[2] ^ k0,
43        SIP_IV[3] ^ k1,
44    ]
45}
46
47/// One SipRound — the shared core of both compression and finalization.
48#[inline(always)]
49pub fn sipround(v: &mut SipState) {
50    v[0] = v[0].wrapping_add(v[1]);
51    v[1] = v[1].rotate_left(13);
52    v[1] ^= v[0];
53    v[0] = v[0].rotate_left(32);
54    v[2] = v[2].wrapping_add(v[3]);
55    v[3] = v[3].rotate_left(16);
56    v[3] ^= v[2];
57    v[0] = v[0].wrapping_add(v[3]);
58    v[3] = v[3].rotate_left(21);
59    v[3] ^= v[0];
60    v[2] = v[2].wrapping_add(v[1]);
61    v[1] = v[1].rotate_left(17);
62    v[1] ^= v[2];
63    v[2] = v[2].rotate_left(32);
64}
65
66/// Absorb one full 8-byte message word `m` into the state (compression).
67#[inline(always)]
68pub fn sip_absorb(v: &mut SipState, m: u64) {
69    v[3] ^= m;
70    let mut i = 0;
71    while i < SIP_C {
72        sipround(v);
73        i += 1;
74    }
75    v[0] ^= m;
76}
77
78/// Finalize a copy of the state for a message of total length `len` whose
79/// trailing `< 8` bytes are packed (little-endian) in `tail`. Returns the
80/// 64-bit hash. The state is taken by value so callers can finalize a
81/// *clone* at each `/` while continuing to absorb into the original — that's
82/// how the eBPF walk emits an ancestor-prefix hash without restarting.
83#[inline(always)]
84pub fn sip_finish(mut v: SipState, len: usize, tail: u64) -> u64 {
85    let b = ((len as u64) << 56) | tail;
86    v[3] ^= b;
87    let mut i = 0;
88    while i < SIP_C {
89        sipround(&mut v);
90        i += 1;
91    }
92    v[0] ^= b;
93    v[2] ^= 0xff;
94    i = 0;
95    while i < SIP_D {
96        sipround(&mut v);
97        i += 1;
98    }
99    v[0] ^ v[1] ^ v[2] ^ v[3]
100}
101
102/// One-shot SipHash-2-4 of `data` under key `(k0, k1)`. Used by userspace;
103/// the eBPF path walk mirrors this construction (see the agreement test).
104#[cfg(feature = "user")]
105pub fn siphash(k0: u64, k1: u64, data: &[u8]) -> u64 {
106    let mut v = sip_init(k0, k1);
107    let len = data.len();
108    let mut i = 0;
109    while i + 8 <= len {
110        let m = u64::from_le_bytes([
111            data[i],
112            data[i + 1],
113            data[i + 2],
114            data[i + 3],
115            data[i + 4],
116            data[i + 5],
117            data[i + 6],
118            data[i + 7],
119        ]);
120        sip_absorb(&mut v, m);
121        i += 8;
122    }
123    let mut tail: u64 = 0;
124    let mut shift = 0;
125    while i < len {
126        tail |= (data[i] as u64) << shift;
127        shift += 8;
128        i += 1;
129    }
130    sip_finish(v, len, tail)
131}
132
133#[cfg(all(test, feature = "user"))]
134mod tests {
135    use super::*;
136
137    /// Test-side mirror of what the eBPF program will do: a single streaming
138    /// pass over a NUL-free path that, at each `/`, finalizes a *clone* of the
139    /// running state to get that prefix's hash, and tracks a separate
140    /// component state (reset after each `/`) for the basename. Returns
141    /// `(full, folder, basename, ancestors)`.
142    fn streaming_walk(k0: u64, k1: u64, path: &[u8]) -> (u64, u64, u64, Vec<u64>) {
143        let mut v = sip_init(k0, k1); // whole-path state
144        let mut full_len = 0usize;
145        let mut tail: u64 = 0; // pending < 8 bytes of the whole path
146        let mut tail_n = 0usize;
147
148        let mut c = sip_init(k0, k1); // current-component state
149        let mut comp_len = 0usize;
150        let mut comp_tail: u64 = 0;
151        let mut comp_n = 0usize;
152
153        let mut folder = 0u64;
154        let mut ancestors = Vec::new();
155
156        let absorb = |v: &mut SipState, tail: &mut u64, tn: &mut usize, len: &mut usize, b: u8| {
157            *tail |= (b as u64) << (8 * *tn);
158            *tn += 1;
159            *len += 1;
160            if *tn == 8 {
161                sip_absorb(v, *tail);
162                *tail = 0;
163                *tn = 0;
164            }
165        };
166
167        for &b in path {
168            absorb(&mut v, &mut tail, &mut tail_n, &mut full_len, b);
169            if b == b'/' {
170                // ancestor / folder = whole-path hash up to & including this '/'
171                folder = sip_finish(v, full_len, tail);
172                ancestors.push(folder);
173                // restart the component (basename) state after the slash
174                c = sip_init(k0, k1);
175                comp_tail = 0;
176                comp_n = 0;
177                comp_len = 0;
178            } else {
179                absorb(&mut c, &mut comp_tail, &mut comp_n, &mut comp_len, b);
180            }
181        }
182        let full = sip_finish(v, full_len, tail);
183        let basename = sip_finish(c, comp_len, comp_tail);
184        (full, folder, basename, ancestors)
185    }
186
187    #[test]
188    fn matches_published_2_4_vector() {
189        // Reference vector from the SipHash paper / reference implementation:
190        // key = bytes 00..0f, message = bytes 00..0e (15 bytes).
191        let k0 = 0x0706_0504_0302_0100;
192        let k1 = 0x0f0e_0d0c_0b0a_0908;
193        let msg: Vec<u8> = (0u8..15).collect();
194        assert_eq!(siphash(k0, k1, &msg), 0xa129_ca61_49be_45e5);
195    }
196
197    #[test]
198    fn distinct_inputs_and_keys_differ() {
199        assert_ne!(siphash(1, 2, b"/usr/bin/curl"), siphash(1, 2, b"/usr/bin/wget"));
200        // Same input, different key → different hash (the whole point of #3).
201        assert_ne!(siphash(1, 2, b"/usr/bin/curl"), siphash(9, 9, b"/usr/bin/curl"));
202    }
203
204    #[test]
205    fn streaming_walk_matches_one_shot() {
206        // The executable spec for the eBPF walk: every prefix/component hash
207        // the streaming pass emits must equal the one-shot SipHash of that
208        // exact substring. If this holds, a faithful BPF port agrees with
209        // userspace byte-for-byte.
210        let (k0, k1) = (0xdead_beef_0bad_f00d, 0x0123_4567_89ab_cdef);
211        for path in [
212            &b"/a/bb/ccc/exe"[..],
213            b"/",
214            b"x",
215            b"/home/u/Downloads/installer.sh",
216            b"no-slashes-here",
217            b"/exactly8/aligned/", // exercise 8-byte word boundaries
218        ] {
219            let (full, folder, basename, ancestors) = streaming_walk(k0, k1, path);
220            assert_eq!(full, siphash(k0, k1, path), "full {path:?}");
221
222            // collect the byte offsets of every '/'
223            let slashes: Vec<usize> = path
224                .iter()
225                .enumerate()
226                .filter(|(_, &b)| b == b'/')
227                .map(|(i, _)| i)
228                .collect();
229            assert_eq!(ancestors.len(), slashes.len(), "ancestor count {path:?}");
230            for (k, &end) in slashes.iter().enumerate() {
231                assert_eq!(ancestors[k], siphash(k0, k1, &path[..=end]), "anc {k} {path:?}");
232            }
233            if let Some(&last) = slashes.last() {
234                assert_eq!(folder, siphash(k0, k1, &path[..=last]), "folder {path:?}");
235                assert_eq!(basename, siphash(k0, k1, &path[last + 1..]), "base {path:?}");
236            } else {
237                // no slash: basename is the whole thing, folder never set
238                assert_eq!(basename, siphash(k0, k1, path), "base-noslash {path:?}");
239            }
240        }
241    }
242}