Skip to main content

s4_server/
audit_log.rs

1//! Tamper-evident audit-log HMAC chain (v0.5 #31).
2//!
3//! Extends the v0.4 #20 S3-style access log emitter with a
4//! hash-linked HMAC-SHA256 column appended to every line. Each line's
5//! HMAC is computed over the previous line's HMAC bytes concatenated
6//! with the current line's text (excluding the HMAC field itself):
7//!
8//! ```text
9//! hmac_n = HMAC-SHA256(key, hmac_{n-1} || line_n_without_hmac)
10//! ```
11//!
12//! The genesis HMAC seed is `SHA256("S4-AUDIT-V1")` — a fixed,
13//! publicly-known constant that anchors the chain at a deterministic
14//! starting point so verifiers don't need to trust the producer about
15//! "where the chain started".
16//!
17//! ## File rotation
18//!
19//! When the access-log flusher rolls over to a new file (hourly +
20//! batch-counter), the new file starts with a comment line:
21//!
22//! ```text
23//! # prev_file_tail=<hex-encoded last_hmac of the previous file>
24//! ```
25//!
26//! The first real entry in the new file uses that tail as its
27//! `prev_hmac`, so the chain extends across rotations. A verifier can
28//! optionally walk multiple files in chronological order to confirm
29//! the cross-file linkage.
30//!
31//! ## Wire format per entry
32//!
33//! ```text
34//! <existing S3-style access-log line> <hex hmac (64 chars)>\n
35//! ```
36//!
37//! A single trailing space then 64 lowercase hex chars. Existing
38//! parsers that split on whitespace see one extra column.
39//!
40//! ## Key loader
41//!
42//! `AuditHmacKey::from_str("raw:32-byte-string")`,
43//! `"hex:0123...64-char"`, or `"base64:..."` — same shape as
44//! `SseKey::from_str` (see `sse.rs`). For very small ops setups, the
45//! `raw:` prefix lets you stash the key directly in a CLI flag /
46//! systemd unit env var; production should prefer `hex:` or `base64:`
47//! delivered out-of-band.
48//!
49//! ## Verifier CLI
50//!
51//! `s4 verify-audit-log <FILE> --hmac-key <SPEC>` walks the file,
52//! recomputes each line's expected HMAC, and reports the first chain
53//! break (if any). Returns `VerifyReport { total_lines, ok_lines,
54//! first_break }`. Comment lines (`# prev_file_tail=...`) are honoured
55//! as the genesis-prev for the first real entry.
56//!
57//! ## Limitations (deliberate, v0.5 scope)
58//!
59//! - Single key, no key rotation — a follow-up issue tracks a key-id
60//!   field per line.
61//! - In-memory chain state only — if the process restarts mid-hour,
62//!   the new flusher loads no state and writes a fresh genesis line at
63//!   the top of the next batch file. Verifier handles this by treating
64//!   missing `# prev_file_tail=` as "this batch is its own chain".
65//! - Verifier only walks one file at a time; cross-file walk is the
66//!   operator's responsibility (sort by name, feed one-by-one).
67
68use std::path::Path;
69use std::str::FromStr;
70use std::sync::Arc;
71
72use hmac::{Hmac, Mac};
73use sha2::{Digest, Sha256};
74use thiserror::Error;
75
76/// The fixed genesis seed: `SHA256("S4-AUDIT-V1")`. Computed once at
77/// startup; we keep it as a function (not a const) because Sha256 is
78/// not const-fn yet.
79pub const GENESIS_LABEL: &[u8] = b"S4-AUDIT-V1";
80
81/// Hex-encoded HMAC field length in characters (SHA-256 → 32 bytes →
82/// 64 hex chars).
83pub const HMAC_HEX_LEN: usize = 64;
84
85/// Comment prefix used to carry the previous file's last HMAC across a
86/// rotation boundary.
87pub const PREV_TAIL_COMMENT_PREFIX: &str = "# prev_file_tail=";
88
89type HmacSha256 = Hmac<Sha256>;
90
91/// Fixed-length HMAC-SHA256 key. Held inside an `Arc` for cheap
92/// sharing across the access-log flusher and any verifier callers.
93#[derive(Clone)]
94pub struct AuditHmacKey(Arc<Vec<u8>>);
95
96#[derive(Debug, Error)]
97pub enum AuditKeyError {
98    #[error(
99        "audit-log HMAC key spec must start with `raw:`, `hex:`, or `base64:` (got: {0:?})"
100    )]
101    BadPrefix(String),
102    #[error("audit-log HMAC key hex must be even-length and all-hex; got {0}")]
103    BadHex(String),
104    #[error("audit-log HMAC key base64 decode failed: {0}")]
105    BadBase64(String),
106    #[error("audit-log HMAC key must be at least 16 bytes after decode (got {0})")]
107    TooShort(usize),
108}
109
110impl AuditHmacKey {
111    /// Parse a key from a CLI-style spec. Three forms:
112    ///
113    /// - `raw:<utf8 bytes>` — the bytes after the prefix are the key
114    ///   verbatim. Useful for tests and small ops; production should
115    ///   prefer `hex:` or `base64:`.
116    /// - `hex:<hex chars>` — even-length, all-hex.
117    /// - `base64:<base64 chars>` — standard base64, padding optional.
118    ///
119    /// Minimum decoded length: 16 bytes (128 bits). HMAC-SHA256 itself
120    /// permits any key length, but anything <16 bytes is operator
121    /// error rather than a sound choice.
122    pub fn as_bytes(&self) -> &[u8] {
123        &self.0
124    }
125}
126
127impl FromStr for AuditHmacKey {
128    type Err = AuditKeyError;
129
130    fn from_str(spec: &str) -> Result<Self, Self::Err> {
131        let bytes = if let Some(s) = spec.strip_prefix("raw:") {
132            s.as_bytes().to_vec()
133        } else if let Some(s) = spec.strip_prefix("hex:") {
134            if !s.len().is_multiple_of(2) || !s.chars().all(|c| c.is_ascii_hexdigit()) {
135                return Err(AuditKeyError::BadHex(s.to_owned()));
136            }
137            let mut out = Vec::with_capacity(s.len() / 2);
138            for i in (0..s.len()).step_by(2) {
139                out.push(
140                    u8::from_str_radix(&s[i..i + 2], 16)
141                        .map_err(|_| AuditKeyError::BadHex(s.to_owned()))?,
142                );
143            }
144            out
145        } else if let Some(s) = spec.strip_prefix("base64:") {
146            base64::Engine::decode(&base64::engine::general_purpose::STANDARD, s.as_bytes())
147                .map_err(|e| AuditKeyError::BadBase64(e.to_string()))?
148        } else {
149            return Err(AuditKeyError::BadPrefix(spec.to_owned()));
150        };
151        if bytes.len() < 16 {
152            return Err(AuditKeyError::TooShort(bytes.len()));
153        }
154        Ok(Self(Arc::new(bytes)))
155    }
156}
157
158impl std::fmt::Debug for AuditHmacKey {
159    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
160        f.debug_struct("AuditHmacKey")
161            .field("len", &self.0.len())
162            .field("key", &"<redacted>")
163            .finish()
164    }
165}
166
167pub type SharedAuditHmacKey = Arc<AuditHmacKey>;
168
169/// Compute the genesis seed: `SHA256("S4-AUDIT-V1")`. Used as the
170/// `prev_hmac` for the very first line in a chain (when no previous
171/// file's tail is available).
172pub fn genesis_prev() -> [u8; 32] {
173    let mut h = Sha256::new();
174    h.update(GENESIS_LABEL);
175    let out = h.finalize();
176    let mut buf = [0u8; 32];
177    buf.copy_from_slice(&out);
178    buf
179}
180
181/// Compute one chain step. Input: previous HMAC bytes + the line text
182/// without its HMAC suffix (and without the trailing newline).
183/// Output: 32-byte HMAC-SHA256.
184pub fn chain_step(key: &AuditHmacKey, prev_hmac: &[u8], line_no_hmac: &[u8]) -> [u8; 32] {
185    let mut mac = HmacSha256::new_from_slice(key.as_bytes())
186        .expect("HMAC-SHA256 accepts any key length");
187    mac.update(prev_hmac);
188    mac.update(line_no_hmac);
189    let out = mac.finalize().into_bytes();
190    let mut buf = [0u8; 32];
191    buf.copy_from_slice(&out);
192    buf
193}
194
195/// Render `bytes` as lowercase hex (no separators).
196pub fn hex_encode(bytes: &[u8]) -> String {
197    let mut out = String::with_capacity(bytes.len() * 2);
198    for b in bytes {
199        out.push_str(&format!("{b:02x}"));
200    }
201    out
202}
203
204/// Decode a hex string back to bytes. `None` on any non-hex character
205/// or odd length.
206pub fn hex_decode(s: &str) -> Option<Vec<u8>> {
207    if !s.len().is_multiple_of(2) {
208        return None;
209    }
210    let mut out = Vec::with_capacity(s.len() / 2);
211    for i in (0..s.len()).step_by(2) {
212        out.push(u8::from_str_radix(&s[i..i + 2], 16).ok()?);
213    }
214    Some(out)
215}
216
217/// Result of `verify_audit_log`. `first_break` is `None` when the
218/// chain is intact end-to-end.
219#[derive(Debug, Clone, PartialEq, Eq)]
220pub struct VerifyReport {
221    pub total_lines: u64,
222    pub ok_lines: u64,
223    pub first_break: Option<VerifyBreak>,
224}
225
226#[derive(Debug, Clone, PartialEq, Eq)]
227pub struct VerifyBreak {
228    /// 1-indexed line number within the file (counting all lines,
229    /// including comment lines).
230    pub line_no: u64,
231    /// Hex-encoded HMAC the verifier computed.
232    pub expected_hmac: String,
233    /// Hex-encoded HMAC the verifier read off the line (or "<missing>"
234    /// if the trailing column wasn't present at all).
235    pub actual_hmac: String,
236}
237
238#[derive(Debug, Error)]
239pub enum VerifyError {
240    #[error("audit-log file {path:?}: {source}")]
241    Io {
242        path: std::path::PathBuf,
243        source: std::io::Error,
244    },
245    #[error("audit-log file {path:?}: prev_file_tail comment had non-hex value: {value:?}")]
246    BadPrevTail {
247        path: std::path::PathBuf,
248        value: String,
249    },
250}
251
252/// Walk an audit-log file, recomputing each line's HMAC and comparing
253/// against the trailing column. Stops at the first break and reports
254/// it (subsequent lines are NOT counted as `ok_lines` — they may all
255/// be valid, just not chain-linked from where the break is).
256///
257/// Comment lines (lines starting with `#`) are honoured — specifically
258/// `# prev_file_tail=<hex>` resets the running `prev_hmac` to that
259/// value before the next non-comment line. Other comment lines are
260/// counted but not chain-checked.
261///
262/// Empty / whitespace-only lines are skipped (counted but neither
263/// chain-checked nor flagged).
264pub fn verify_audit_log(path: &Path, key: &AuditHmacKey) -> Result<VerifyReport, VerifyError> {
265    let raw = std::fs::read(path).map_err(|source| VerifyError::Io {
266        path: path.to_path_buf(),
267        source,
268    })?;
269    verify_audit_bytes(path, &raw, key)
270}
271
272/// Same as `verify_audit_log` but takes the in-memory bytes directly.
273/// Used by the unit tests; the file-path version delegates here after
274/// reading.
275pub fn verify_audit_bytes(
276    path: &Path,
277    bytes: &[u8],
278    key: &AuditHmacKey,
279) -> Result<VerifyReport, VerifyError> {
280    let text = std::str::from_utf8(bytes).map_err(|e| VerifyError::Io {
281        path: path.to_path_buf(),
282        source: std::io::Error::new(std::io::ErrorKind::InvalidData, e),
283    })?;
284
285    let mut prev_hmac: [u8; 32] = genesis_prev();
286    let mut have_explicit_prev = false;
287    let mut total: u64 = 0;
288    let mut ok: u64 = 0;
289
290    for (idx, raw_line) in text.split_inclusive('\n').enumerate() {
291        total += 1;
292        let line_no = (idx + 1) as u64;
293        // Strip the trailing newline (and CR, defensively) for
294        // chain-step input. We do NOT trim leading whitespace because
295        // the access log format starts with `-` deliberately.
296        let line = raw_line.trim_end_matches('\n').trim_end_matches('\r');
297        if line.trim().is_empty() {
298            continue;
299        }
300        if let Some(rest) = line.strip_prefix(PREV_TAIL_COMMENT_PREFIX) {
301            let hex = rest.trim();
302            let bytes = hex_decode(hex).ok_or_else(|| VerifyError::BadPrevTail {
303                path: path.to_path_buf(),
304                value: hex.to_owned(),
305            })?;
306            if bytes.len() != 32 {
307                return Err(VerifyError::BadPrevTail {
308                    path: path.to_path_buf(),
309                    value: hex.to_owned(),
310                });
311            }
312            prev_hmac.copy_from_slice(&bytes);
313            have_explicit_prev = true;
314            continue;
315        }
316        if line.starts_with('#') {
317            // other comment — skip but count.
318            continue;
319        }
320        // Split off the trailing HMAC column.
321        let (line_no_hmac, actual_hex) = match split_hmac_suffix(line) {
322            Some((body, hmac_hex)) => (body, hmac_hex),
323            None => {
324                return Ok(VerifyReport {
325                    total_lines: total,
326                    ok_lines: ok,
327                    first_break: Some(VerifyBreak {
328                        line_no,
329                        expected_hmac: hex_encode(&chain_step(key, &prev_hmac, line.as_bytes())),
330                        actual_hmac: "<missing>".to_owned(),
331                    }),
332                });
333            }
334        };
335        let expected = chain_step(key, &prev_hmac, line_no_hmac.as_bytes());
336        let expected_hex = hex_encode(&expected);
337        if expected_hex == actual_hex {
338            ok += 1;
339            prev_hmac = expected;
340            have_explicit_prev = true;
341        } else {
342            return Ok(VerifyReport {
343                total_lines: total,
344                ok_lines: ok,
345                first_break: Some(VerifyBreak {
346                    line_no,
347                    expected_hmac: expected_hex,
348                    actual_hmac: actual_hex.to_owned(),
349                }),
350            });
351        }
352    }
353    let _ = have_explicit_prev; // reserved for future cross-file walk reporting
354    Ok(VerifyReport {
355        total_lines: total,
356        ok_lines: ok,
357        first_break: None,
358    })
359}
360
361/// Split a chained line into `(body_without_hmac, hmac_hex)`. The
362/// HMAC is the last whitespace-separated column and is exactly 64
363/// lowercase hex characters. Returns `None` if the line doesn't end
364/// with a valid hex column of the expected length.
365fn split_hmac_suffix(line: &str) -> Option<(&str, &str)> {
366    if line.len() <= HMAC_HEX_LEN + 1 {
367        return None;
368    }
369    let cut = line.len() - HMAC_HEX_LEN;
370    let body = &line[..cut];
371    let hmac = &line[cut..];
372    // body must end with a single space separator.
373    if !body.ends_with(' ') {
374        return None;
375    }
376    if hmac.len() != HMAC_HEX_LEN || !hmac.chars().all(|c| c.is_ascii_hexdigit()) {
377        return None;
378    }
379    // Drop the trailing space so the chain input matches the producer's
380    // (which appends ` <hex>\n` to the underlying line).
381    Some((&body[..body.len() - 1], hmac))
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387
388    fn key() -> AuditHmacKey {
389        AuditHmacKey::from_str("raw:0123456789abcdef0123456789abcdef").unwrap()
390    }
391
392    #[test]
393    fn genesis_is_sha256_of_label() {
394        let g = genesis_prev();
395        // SHA-256("S4-AUDIT-V1") — recomputed independently to lock
396        // the constant down. Any change to the label is a wire break.
397        let mut h = Sha256::new();
398        h.update(b"S4-AUDIT-V1");
399        let want = h.finalize();
400        assert_eq!(&g[..], &want[..]);
401    }
402
403    #[test]
404    fn key_parsing_accepts_three_prefixes() {
405        let r = AuditHmacKey::from_str("raw:0123456789abcdef0123456789abcdef").unwrap();
406        assert_eq!(r.as_bytes().len(), 32);
407        let h = AuditHmacKey::from_str(
408            "hex:0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
409        )
410        .unwrap();
411        assert_eq!(h.as_bytes().len(), 32);
412        // 32 zero bytes -> base64 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="
413        let b = AuditHmacKey::from_str("base64:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=")
414            .unwrap();
415        assert_eq!(b.as_bytes(), &[0u8; 32]);
416    }
417
418    #[test]
419    fn key_parsing_rejects_short_keys() {
420        let err = AuditHmacKey::from_str("raw:short").unwrap_err();
421        assert!(matches!(err, AuditKeyError::TooShort(5)));
422    }
423
424    #[test]
425    fn key_parsing_rejects_bad_prefix() {
426        let err = AuditHmacKey::from_str("plain:key").unwrap_err();
427        assert!(matches!(err, AuditKeyError::BadPrefix(_)));
428    }
429
430    #[test]
431    fn happy_path_chain_verifies() {
432        let key = key();
433        // Build a 3-line file by hand.
434        let lines = ["line one alpha", "line two beta", "line three gamma"];
435        let mut buf = String::new();
436        let mut prev = genesis_prev();
437        for ln in &lines {
438            let mac = chain_step(&key, &prev, ln.as_bytes());
439            buf.push_str(ln);
440            buf.push(' ');
441            buf.push_str(&hex_encode(&mac));
442            buf.push('\n');
443            prev = mac;
444        }
445        let report =
446            verify_audit_bytes(std::path::Path::new("<mem>"), buf.as_bytes(), &key).unwrap();
447        assert_eq!(report.total_lines, 3);
448        assert_eq!(report.ok_lines, 3);
449        assert!(report.first_break.is_none());
450    }
451
452    #[test]
453    fn tamper_one_byte_in_middle_breaks_at_that_line() {
454        let key = key();
455        let lines = ["line A", "line B middle", "line C tail"];
456        let mut buf = String::new();
457        let mut prev = genesis_prev();
458        for ln in &lines {
459            let mac = chain_step(&key, &prev, ln.as_bytes());
460            buf.push_str(ln);
461            buf.push(' ');
462            buf.push_str(&hex_encode(&mac));
463            buf.push('\n');
464            prev = mac;
465        }
466        // Flip one character in the middle of line 2's body.
467        let bad = buf.replace("middle", "MIDDLE");
468        let report =
469            verify_audit_bytes(std::path::Path::new("<mem>"), bad.as_bytes(), &key).unwrap();
470        assert!(report.first_break.is_some(), "expected a break");
471        let br = report.first_break.unwrap();
472        assert_eq!(br.line_no, 2, "break should be on line 2");
473        assert_eq!(report.ok_lines, 1, "line 1 OK before the break");
474    }
475
476    #[test]
477    fn tamper_hmac_field_breaks_at_that_line() {
478        let key = key();
479        let line = "lonely line";
480        let mac = chain_step(&key, &genesis_prev(), line.as_bytes());
481        let s = format!("{} {}\n", line, hex_encode(&mac));
482        // Flip a hex char in the HMAC suffix (penultimate byte; final
483        // byte is '\n').
484        let last = s.len() - 2;
485        let c = s.as_bytes()[last];
486        let new_c = if c == b'0' { '1' } else { '0' };
487        let mut bad = String::with_capacity(s.len());
488        bad.push_str(&s[..last]);
489        bad.push(new_c);
490        bad.push_str(&s[last + 1..]);
491        let report =
492            verify_audit_bytes(std::path::Path::new("<mem>"), bad.as_bytes(), &key).unwrap();
493        let br = report.first_break.expect("expected break");
494        assert_eq!(br.line_no, 1);
495        // Actual byte was flipped, so c is unchanged in `bad`.
496        let _ = c;
497    }
498
499    #[test]
500    fn missing_hmac_column_reports_break_with_missing_marker() {
501        let key = key();
502        let s = "no hmac at all\n";
503        let report =
504            verify_audit_bytes(std::path::Path::new("<mem>"), s.as_bytes(), &key).unwrap();
505        let br = report.first_break.expect("expected break");
506        assert_eq!(br.actual_hmac, "<missing>");
507    }
508
509    #[test]
510    fn cross_file_chain_via_prev_tail_comment() {
511        let key = key();
512        // First "file": one line, capture its tail.
513        let line1 = "first file lone line";
514        let mac1 = chain_step(&key, &genesis_prev(), line1.as_bytes());
515        let f1 = format!("{} {}\n", line1, hex_encode(&mac1));
516        let r1 =
517            verify_audit_bytes(std::path::Path::new("<f1>"), f1.as_bytes(), &key).unwrap();
518        assert!(r1.first_break.is_none());
519
520        // Second "file": prev_file_tail comment, then one line whose
521        // HMAC is computed from mac1 as its prev.
522        let line2 = "second file lone line";
523        let mac2 = chain_step(&key, &mac1, line2.as_bytes());
524        let f2 = format!(
525            "# prev_file_tail={}\n{} {}\n",
526            hex_encode(&mac1),
527            line2,
528            hex_encode(&mac2)
529        );
530        let r2 =
531            verify_audit_bytes(std::path::Path::new("<f2>"), f2.as_bytes(), &key).unwrap();
532        assert!(r2.first_break.is_none(), "cross-file chain must verify");
533        assert_eq!(r2.ok_lines, 1);
534        assert_eq!(r2.total_lines, 2); // comment + entry
535    }
536
537    #[test]
538    fn cross_file_chain_with_wrong_prev_tail_breaks() {
539        let key = key();
540        let line2 = "second file lone line";
541        // Wrong prev: 32 zero bytes
542        let wrong_prev = [0u8; 32];
543        // But the producer wrote the HMAC computed from genesis (or
544        // anything other than wrong_prev), so the verifier's recompute
545        // will mismatch.
546        let actual_mac = chain_step(&key, &genesis_prev(), line2.as_bytes());
547        let f2 = format!(
548            "# prev_file_tail={}\n{} {}\n",
549            hex_encode(&wrong_prev),
550            line2,
551            hex_encode(&actual_mac)
552        );
553        let r =
554            verify_audit_bytes(std::path::Path::new("<f2>"), f2.as_bytes(), &key).unwrap();
555        assert!(r.first_break.is_some());
556    }
557
558    #[test]
559    fn split_hmac_suffix_basic() {
560        let hmac64 = "a".repeat(64);
561        let s = format!("foo bar baz {hmac64}");
562        let (body, hmac) = split_hmac_suffix(&s).unwrap();
563        assert_eq!(body, "foo bar baz");
564        assert_eq!(hmac.len(), 64);
565        assert_eq!(hmac, hmac64.as_str());
566    }
567
568    #[test]
569    fn split_hmac_suffix_rejects_short_or_nonhex() {
570        assert!(split_hmac_suffix("short").is_none());
571        // 64 chars but contains 'g' (not hex) — produce a 64-char
572        // non-hex suffix to keep the length right.
573        let bad_hmac = "g".repeat(64);
574        let bad = format!("x {bad_hmac}");
575        assert!(split_hmac_suffix(&bad).is_none());
576    }
577
578    #[test]
579    fn hex_roundtrip() {
580        let raw = [0u8, 1, 2, 0xff, 0x10, 0xab];
581        let s = hex_encode(&raw);
582        assert_eq!(s, "000102ff10ab");
583        let dec = hex_decode(&s).unwrap();
584        assert_eq!(dec, raw);
585    }
586}